arkindex-base-worker 0.3.6rc5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.3.6rc5.dist-info → arkindex_base_worker-0.3.7.dist-info}/METADATA +14 -13
- arkindex_base_worker-0.3.7.dist-info/RECORD +47 -0
- {arkindex_base_worker-0.3.6rc5.dist-info → arkindex_base_worker-0.3.7.dist-info}/WHEEL +1 -1
- {arkindex_base_worker-0.3.6rc5.dist-info → arkindex_base_worker-0.3.7.dist-info}/top_level.txt +2 -0
- arkindex_worker/cache.py +14 -0
- arkindex_worker/image.py +29 -19
- arkindex_worker/models.py +14 -2
- arkindex_worker/utils.py +17 -3
- arkindex_worker/worker/__init__.py +122 -125
- arkindex_worker/worker/base.py +24 -24
- arkindex_worker/worker/classification.py +18 -25
- arkindex_worker/worker/dataset.py +24 -18
- arkindex_worker/worker/element.py +45 -6
- arkindex_worker/worker/entity.py +35 -4
- arkindex_worker/worker/metadata.py +21 -11
- arkindex_worker/worker/training.py +13 -0
- arkindex_worker/worker/transcription.py +45 -5
- arkindex_worker/worker/version.py +22 -0
- hooks/pre_gen_project.py +3 -0
- tests/conftest.py +14 -6
- tests/test_base_worker.py +0 -6
- tests/test_dataset_worker.py +291 -409
- tests/test_elements_worker/test_classifications.py +365 -539
- tests/test_elements_worker/test_cli.py +1 -1
- tests/test_elements_worker/test_dataset.py +97 -116
- tests/test_elements_worker/test_elements.py +227 -61
- tests/test_elements_worker/test_entities.py +22 -2
- tests/test_elements_worker/test_metadata.py +53 -27
- tests/test_elements_worker/test_training.py +35 -0
- tests/test_elements_worker/test_transcriptions.py +149 -16
- tests/test_elements_worker/test_worker.py +19 -6
- tests/test_image.py +37 -0
- tests/test_utils.py +23 -1
- worker-demo/tests/__init__.py +0 -0
- worker-demo/tests/conftest.py +32 -0
- worker-demo/tests/test_worker.py +12 -0
- worker-demo/worker_demo/__init__.py +6 -0
- worker-demo/worker_demo/worker.py +19 -0
- arkindex_base_worker-0.3.6rc5.dist-info/RECORD +0 -41
- {arkindex_base_worker-0.3.6rc5.dist-info → arkindex_base_worker-0.3.7.dist-info}/LICENSE +0 -0
|
@@ -179,6 +179,41 @@ def test_validate_model_version_not_created(mock_training_worker):
|
|
|
179
179
|
mock_training_worker.validate_model_version(hash="a", size=1, archive_hash="b")
|
|
180
180
|
|
|
181
181
|
|
|
182
|
+
@pytest.mark.parametrize("status_code", [403, 500])
|
|
183
|
+
def test_validate_model_version_catch_errors(
|
|
184
|
+
mocker, mock_training_worker, caplog, status_code
|
|
185
|
+
):
|
|
186
|
+
mocker.patch(
|
|
187
|
+
"arkindex_worker.worker.base.BaseWorker.request.retry.retry", return_value=False
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
mock_training_worker.model_version = {"id": "model_version_id"}
|
|
191
|
+
args = {
|
|
192
|
+
"hash": "hash",
|
|
193
|
+
"archive_hash": "archive_hash",
|
|
194
|
+
"size": 30,
|
|
195
|
+
}
|
|
196
|
+
mock_training_worker.api_client.add_error_response(
|
|
197
|
+
"ValidateModelVersion",
|
|
198
|
+
id="model_version_id",
|
|
199
|
+
status_code=status_code,
|
|
200
|
+
body=args,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
mock_training_worker.validate_model_version(**args)
|
|
204
|
+
assert mock_training_worker.model_version == {"id": "model_version_id"}
|
|
205
|
+
assert [
|
|
206
|
+
(level, message)
|
|
207
|
+
for module, level, message in caplog.record_tuples
|
|
208
|
+
if module == "arkindex_worker"
|
|
209
|
+
] == [
|
|
210
|
+
(
|
|
211
|
+
logging.WARNING,
|
|
212
|
+
"An error occurred while validating model version model_version_id, please check its status.",
|
|
213
|
+
),
|
|
214
|
+
]
|
|
215
|
+
|
|
216
|
+
|
|
182
217
|
@pytest.mark.parametrize("deletion_failed", [True, False])
|
|
183
218
|
def test_validate_model_version_hash_conflict(
|
|
184
219
|
mock_training_worker, default_model_version, caplog, deletion_failed
|
|
@@ -1711,11 +1711,29 @@ def test_list_transcriptions_wrong_recursive(mock_elements_worker):
|
|
|
1711
1711
|
)
|
|
1712
1712
|
|
|
1713
1713
|
|
|
1714
|
-
def
|
|
1714
|
+
def test_list_transcriptions_wrong_worker_run(mock_elements_worker):
|
|
1715
1715
|
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
1716
1716
|
|
|
1717
1717
|
with pytest.raises(
|
|
1718
|
-
AssertionError, match="
|
|
1718
|
+
AssertionError, match="worker_run should be of type str or bool"
|
|
1719
|
+
):
|
|
1720
|
+
mock_elements_worker.list_transcriptions(
|
|
1721
|
+
element=elt,
|
|
1722
|
+
worker_run=1234,
|
|
1723
|
+
)
|
|
1724
|
+
|
|
1725
|
+
|
|
1726
|
+
def test_list_transcriptions_wrong_worker_version(mock_elements_worker):
|
|
1727
|
+
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
1728
|
+
|
|
1729
|
+
# WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
|
|
1730
|
+
with (
|
|
1731
|
+
pytest.deprecated_call(
|
|
1732
|
+
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
|
|
1733
|
+
),
|
|
1734
|
+
pytest.raises(
|
|
1735
|
+
AssertionError, match="worker_version should be of type str or bool"
|
|
1736
|
+
),
|
|
1719
1737
|
):
|
|
1720
1738
|
mock_elements_worker.list_transcriptions(
|
|
1721
1739
|
element=elt,
|
|
@@ -1723,11 +1741,30 @@ def test_list_transcriptions_wrong_worker_version(mock_elements_worker):
|
|
|
1723
1741
|
)
|
|
1724
1742
|
|
|
1725
1743
|
|
|
1726
|
-
def
|
|
1744
|
+
def test_list_transcriptions_wrong_bool_worker_run(mock_elements_worker):
|
|
1727
1745
|
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
1728
1746
|
|
|
1729
1747
|
with pytest.raises(
|
|
1730
|
-
AssertionError, match="if of type bool,
|
|
1748
|
+
AssertionError, match="if of type bool, worker_run can only be set to False"
|
|
1749
|
+
):
|
|
1750
|
+
mock_elements_worker.list_transcriptions(
|
|
1751
|
+
element=elt,
|
|
1752
|
+
worker_run=True,
|
|
1753
|
+
)
|
|
1754
|
+
|
|
1755
|
+
|
|
1756
|
+
def test_list_transcriptions_wrong_bool_worker_version(mock_elements_worker):
|
|
1757
|
+
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
1758
|
+
|
|
1759
|
+
# WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
|
|
1760
|
+
with (
|
|
1761
|
+
pytest.deprecated_call(
|
|
1762
|
+
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
|
|
1763
|
+
),
|
|
1764
|
+
pytest.raises(
|
|
1765
|
+
AssertionError,
|
|
1766
|
+
match="if of type bool, worker_version can only be set to False",
|
|
1767
|
+
),
|
|
1731
1768
|
):
|
|
1732
1769
|
mock_elements_worker.list_transcriptions(
|
|
1733
1770
|
element=elt,
|
|
@@ -1784,6 +1821,7 @@ def test_list_transcriptions(responses, mock_elements_worker):
|
|
|
1784
1821
|
"text": "hey",
|
|
1785
1822
|
"confidence": 0.42,
|
|
1786
1823
|
"worker_version_id": "56785678-5678-5678-5678-567856785678",
|
|
1824
|
+
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
1787
1825
|
"element": None,
|
|
1788
1826
|
},
|
|
1789
1827
|
{
|
|
@@ -1791,6 +1829,7 @@ def test_list_transcriptions(responses, mock_elements_worker):
|
|
|
1791
1829
|
"text": "it's",
|
|
1792
1830
|
"confidence": 0.42,
|
|
1793
1831
|
"worker_version_id": "56785678-5678-5678-5678-567856785678",
|
|
1832
|
+
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
1794
1833
|
"element": None,
|
|
1795
1834
|
},
|
|
1796
1835
|
{
|
|
@@ -1798,6 +1837,7 @@ def test_list_transcriptions(responses, mock_elements_worker):
|
|
|
1798
1837
|
"text": "me",
|
|
1799
1838
|
"confidence": 0.42,
|
|
1800
1839
|
"worker_version_id": "56785678-5678-5678-5678-567856785678",
|
|
1840
|
+
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
1801
1841
|
"element": None,
|
|
1802
1842
|
},
|
|
1803
1843
|
]
|
|
@@ -1836,6 +1876,7 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
|
|
|
1836
1876
|
"text": "hey",
|
|
1837
1877
|
"confidence": 0.42,
|
|
1838
1878
|
"worker_version_id": None,
|
|
1879
|
+
"worker_run_id": None,
|
|
1839
1880
|
"element": None,
|
|
1840
1881
|
}
|
|
1841
1882
|
]
|
|
@@ -1850,8 +1891,50 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
|
|
|
1850
1891
|
},
|
|
1851
1892
|
)
|
|
1852
1893
|
|
|
1894
|
+
with pytest.deprecated_call(
|
|
1895
|
+
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
|
|
1896
|
+
):
|
|
1897
|
+
for idx, transcription in enumerate(
|
|
1898
|
+
mock_elements_worker.list_transcriptions(element=elt, worker_version=False)
|
|
1899
|
+
):
|
|
1900
|
+
assert transcription == trans[idx]
|
|
1901
|
+
|
|
1902
|
+
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
1903
|
+
assert [
|
|
1904
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
1905
|
+
] == BASE_API_CALLS + [
|
|
1906
|
+
(
|
|
1907
|
+
"GET",
|
|
1908
|
+
"http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_version=False",
|
|
1909
|
+
),
|
|
1910
|
+
]
|
|
1911
|
+
|
|
1912
|
+
|
|
1913
|
+
def test_list_transcriptions_manual_worker_run(responses, mock_elements_worker):
|
|
1914
|
+
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
1915
|
+
trans = [
|
|
1916
|
+
{
|
|
1917
|
+
"id": "0000",
|
|
1918
|
+
"text": "hey",
|
|
1919
|
+
"confidence": 0.42,
|
|
1920
|
+
"worker_version_id": None,
|
|
1921
|
+
"worker_run_id": None,
|
|
1922
|
+
"element": None,
|
|
1923
|
+
}
|
|
1924
|
+
]
|
|
1925
|
+
responses.add(
|
|
1926
|
+
responses.GET,
|
|
1927
|
+
"http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_run=False",
|
|
1928
|
+
status=200,
|
|
1929
|
+
json={
|
|
1930
|
+
"count": 1,
|
|
1931
|
+
"next": None,
|
|
1932
|
+
"results": trans,
|
|
1933
|
+
},
|
|
1934
|
+
)
|
|
1935
|
+
|
|
1853
1936
|
for idx, transcription in enumerate(
|
|
1854
|
-
mock_elements_worker.list_transcriptions(element=elt,
|
|
1937
|
+
mock_elements_worker.list_transcriptions(element=elt, worker_run=False)
|
|
1855
1938
|
):
|
|
1856
1939
|
assert transcription == trans[idx]
|
|
1857
1940
|
|
|
@@ -1861,7 +1944,7 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
|
|
|
1861
1944
|
] == BASE_API_CALLS + [
|
|
1862
1945
|
(
|
|
1863
1946
|
"GET",
|
|
1864
|
-
"http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?
|
|
1947
|
+
"http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_run=False",
|
|
1865
1948
|
),
|
|
1866
1949
|
]
|
|
1867
1950
|
|
|
@@ -1895,16 +1978,26 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
|
|
|
1895
1978
|
"66666666-6666-6666-6666-666666666666",
|
|
1896
1979
|
),
|
|
1897
1980
|
),
|
|
1898
|
-
# Filter on element and
|
|
1981
|
+
# Filter on element and worker run should give the first transcription
|
|
1899
1982
|
(
|
|
1900
1983
|
{
|
|
1901
1984
|
"element": CachedElement(
|
|
1902
1985
|
id="11111111-1111-1111-1111-111111111111", type="page"
|
|
1903
1986
|
),
|
|
1904
|
-
"
|
|
1987
|
+
"worker_run": "56785678-5678-5678-5678-567856785678",
|
|
1905
1988
|
},
|
|
1906
1989
|
("11111111-1111-1111-1111-111111111111",),
|
|
1907
1990
|
),
|
|
1991
|
+
# Filter on element, manual worker run should give the sixth transcription
|
|
1992
|
+
(
|
|
1993
|
+
{
|
|
1994
|
+
"element": CachedElement(
|
|
1995
|
+
id="11111111-1111-1111-1111-111111111111", type="page"
|
|
1996
|
+
),
|
|
1997
|
+
"worker_run": False,
|
|
1998
|
+
},
|
|
1999
|
+
("66666666-6666-6666-6666-666666666666",),
|
|
2000
|
+
),
|
|
1908
2001
|
# Filter recursively on element should give all transcriptions inserted
|
|
1909
2002
|
(
|
|
1910
2003
|
{
|
|
@@ -1922,33 +2015,70 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
|
|
|
1922
2015
|
"66666666-6666-6666-6666-666666666666",
|
|
1923
2016
|
),
|
|
1924
2017
|
),
|
|
1925
|
-
# Filter recursively on element and
|
|
2018
|
+
# Filter recursively on element and element_type should give three transcriptions
|
|
1926
2019
|
(
|
|
1927
2020
|
{
|
|
1928
2021
|
"element": CachedElement(
|
|
1929
2022
|
id="11111111-1111-1111-1111-111111111111", type="page"
|
|
1930
2023
|
),
|
|
1931
|
-
"
|
|
2024
|
+
"element_type": "something_else",
|
|
1932
2025
|
"recursive": True,
|
|
1933
2026
|
},
|
|
1934
2027
|
(
|
|
1935
2028
|
"22222222-2222-2222-2222-222222222222",
|
|
1936
|
-
"33333333-3333-3333-3333-333333333333",
|
|
1937
2029
|
"44444444-4444-4444-4444-444444444444",
|
|
1938
2030
|
"55555555-5555-5555-5555-555555555555",
|
|
1939
2031
|
),
|
|
1940
2032
|
),
|
|
1941
|
-
|
|
2033
|
+
],
|
|
2034
|
+
)
|
|
2035
|
+
def test_list_transcriptions_with_cache(
|
|
2036
|
+
responses, mock_elements_worker_with_cache, filters, expected_ids
|
|
2037
|
+
):
|
|
2038
|
+
# Check we have 5 elements already present in database
|
|
2039
|
+
assert CachedTranscription.select().count() == 6
|
|
2040
|
+
|
|
2041
|
+
# Query database through cache
|
|
2042
|
+
transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters)
|
|
2043
|
+
assert transcriptions.count() == len(expected_ids)
|
|
2044
|
+
for transcription, expected_id in zip(
|
|
2045
|
+
transcriptions.order_by(CachedTranscription.id), expected_ids, strict=True
|
|
2046
|
+
):
|
|
2047
|
+
assert transcription.id == UUID(expected_id)
|
|
2048
|
+
|
|
2049
|
+
# Check the worker never hits the API for elements
|
|
2050
|
+
assert len(responses.calls) == len(BASE_API_CALLS)
|
|
2051
|
+
assert [
|
|
2052
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
2053
|
+
] == BASE_API_CALLS
|
|
2054
|
+
|
|
2055
|
+
|
|
2056
|
+
@pytest.mark.usefixtures("_mock_cached_transcriptions")
|
|
2057
|
+
@pytest.mark.parametrize(
|
|
2058
|
+
("filters", "expected_ids"),
|
|
2059
|
+
[
|
|
2060
|
+
# Filter on element and worker_version should give first transcription
|
|
1942
2061
|
(
|
|
1943
2062
|
{
|
|
1944
2063
|
"element": CachedElement(
|
|
1945
2064
|
id="11111111-1111-1111-1111-111111111111", type="page"
|
|
1946
2065
|
),
|
|
1947
|
-
"
|
|
2066
|
+
"worker_version": "56785678-5678-5678-5678-567856785678",
|
|
2067
|
+
},
|
|
2068
|
+
("11111111-1111-1111-1111-111111111111",),
|
|
2069
|
+
),
|
|
2070
|
+
# Filter recursively on element and worker_version should give four transcriptions
|
|
2071
|
+
(
|
|
2072
|
+
{
|
|
2073
|
+
"element": CachedElement(
|
|
2074
|
+
id="11111111-1111-1111-1111-111111111111", type="page"
|
|
2075
|
+
),
|
|
2076
|
+
"worker_version": "90129012-9012-9012-9012-901290129012",
|
|
1948
2077
|
"recursive": True,
|
|
1949
2078
|
},
|
|
1950
2079
|
(
|
|
1951
2080
|
"22222222-2222-2222-2222-222222222222",
|
|
2081
|
+
"33333333-3333-3333-3333-333333333333",
|
|
1952
2082
|
"44444444-4444-4444-4444-444444444444",
|
|
1953
2083
|
"55555555-5555-5555-5555-555555555555",
|
|
1954
2084
|
),
|
|
@@ -1965,14 +2095,17 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
|
|
|
1965
2095
|
),
|
|
1966
2096
|
],
|
|
1967
2097
|
)
|
|
1968
|
-
def
|
|
2098
|
+
def test_list_transcriptions_with_cache_deprecation(
|
|
1969
2099
|
responses, mock_elements_worker_with_cache, filters, expected_ids
|
|
1970
2100
|
):
|
|
1971
2101
|
# Check we have 5 elements already present in database
|
|
1972
2102
|
assert CachedTranscription.select().count() == 6
|
|
1973
2103
|
|
|
1974
|
-
|
|
1975
|
-
|
|
2104
|
+
with pytest.deprecated_call(
|
|
2105
|
+
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
|
|
2106
|
+
):
|
|
2107
|
+
# Query database through cache
|
|
2108
|
+
transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters)
|
|
1976
2109
|
assert transcriptions.count() == len(expected_ids)
|
|
1977
2110
|
for transcription, expected_id in zip(
|
|
1978
2111
|
transcriptions.order_by(CachedTranscription.id), expected_ids, strict=True
|
|
@@ -20,7 +20,8 @@ def test_get_worker_version(fake_dummy_worker):
|
|
|
20
20
|
|
|
21
21
|
api_client.add_response("RetrieveWorkerVersion", response, id=TEST_VERSION_ID)
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
|
|
24
|
+
res = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
|
|
24
25
|
|
|
25
26
|
assert res == response
|
|
26
27
|
assert fake_dummy_worker._worker_version_cache[TEST_VERSION_ID] == response
|
|
@@ -33,8 +34,11 @@ def test_get_worker_version__uses_cache(fake_dummy_worker):
|
|
|
33
34
|
|
|
34
35
|
api_client.add_response("RetrieveWorkerVersion", response, id=TEST_VERSION_ID)
|
|
35
36
|
|
|
36
|
-
|
|
37
|
-
|
|
37
|
+
with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
|
|
38
|
+
response_1 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
|
|
39
|
+
|
|
40
|
+
with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
|
|
41
|
+
response_2 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
|
|
38
42
|
|
|
39
43
|
assert response_1 == response
|
|
40
44
|
assert response_1 == response_2
|
|
@@ -51,12 +55,17 @@ def test_get_worker_version_slug(mocker, fake_dummy_worker):
|
|
|
51
55
|
"worker": {"slug": "mock_slug"},
|
|
52
56
|
}
|
|
53
57
|
|
|
54
|
-
|
|
58
|
+
with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
|
|
59
|
+
slug = fake_dummy_worker.get_worker_version_slug(TEST_VERSION_ID)
|
|
55
60
|
assert slug == "mock_slug"
|
|
56
61
|
|
|
57
62
|
|
|
58
63
|
def test_get_worker_version_slug_none(fake_dummy_worker):
|
|
59
|
-
|
|
64
|
+
# WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
|
|
65
|
+
with (
|
|
66
|
+
pytest.deprecated_call(match="WorkerVersion usage is deprecated."),
|
|
67
|
+
pytest.raises(ValueError, match="No worker version ID"),
|
|
68
|
+
):
|
|
60
69
|
fake_dummy_worker.get_worker_version_slug(None)
|
|
61
70
|
|
|
62
71
|
|
|
@@ -301,7 +310,7 @@ def test_start_activity_error(
|
|
|
301
310
|
),
|
|
302
311
|
]
|
|
303
312
|
assert logger.error.call_args_list == [
|
|
304
|
-
mocker.call("Ran on 1
|
|
313
|
+
mocker.call("Ran on 1 element: 0 completed, 1 failed")
|
|
305
314
|
]
|
|
306
315
|
|
|
307
316
|
|
|
@@ -459,6 +468,10 @@ def test_worker_config_multiple_source(
|
|
|
459
468
|
"id": "12341234-1234-1234-1234-123412341234",
|
|
460
469
|
"name": "Model version 1337",
|
|
461
470
|
"configuration": model_config,
|
|
471
|
+
"model": {
|
|
472
|
+
"id": "hahahaha-haha-haha-haha-hahahahahaha",
|
|
473
|
+
"name": "My model",
|
|
474
|
+
},
|
|
462
475
|
},
|
|
463
476
|
"process": {
|
|
464
477
|
"name": None,
|
tests/test_image.py
CHANGED
|
@@ -7,6 +7,7 @@ from pathlib import Path
|
|
|
7
7
|
|
|
8
8
|
import pytest
|
|
9
9
|
from PIL import Image, ImageChops, ImageOps
|
|
10
|
+
from requests import HTTPError
|
|
10
11
|
|
|
11
12
|
from arkindex_worker.cache import CachedElement, create_tables, init_cache_db
|
|
12
13
|
from arkindex_worker.image import (
|
|
@@ -19,6 +20,7 @@ from arkindex_worker.image import (
|
|
|
19
20
|
polygon_bounding_box,
|
|
20
21
|
revert_orientation,
|
|
21
22
|
trim_polygon,
|
|
23
|
+
upload_image,
|
|
22
24
|
)
|
|
23
25
|
from arkindex_worker.models import Element
|
|
24
26
|
|
|
@@ -547,3 +549,38 @@ def test_download_image_retry_with_max(responses):
|
|
|
547
549
|
assert list(map(attrgetter("request.url"), responses.calls)) == [full_url] * 3 + [
|
|
548
550
|
max_url
|
|
549
551
|
]
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def test_upload_image_retries(responses):
|
|
555
|
+
dest_url = "https://blabla.com/iiif/2/image_path.jpg/full/full/0/default.jpg"
|
|
556
|
+
responses.add(
|
|
557
|
+
responses.PUT,
|
|
558
|
+
dest_url,
|
|
559
|
+
status=400,
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
image = Image.open(FULL_IMAGE).convert("RGB")
|
|
563
|
+
with pytest.raises(
|
|
564
|
+
HTTPError, match=f"400 Client Error: Bad Request for url: {dest_url}"
|
|
565
|
+
):
|
|
566
|
+
upload_image(image, dest_url)
|
|
567
|
+
|
|
568
|
+
# We try 3 times
|
|
569
|
+
assert len(responses.calls) == 3
|
|
570
|
+
assert list(map(attrgetter("request.url"), responses.calls)) == [dest_url] * 3
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
def test_upload_image(responses):
|
|
574
|
+
dest_url = "https://blabla.com/iiif/2/image_path.jpg/full/full/0/default.jpg"
|
|
575
|
+
responses.add(
|
|
576
|
+
responses.PUT,
|
|
577
|
+
dest_url,
|
|
578
|
+
status=200,
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
image = Image.open(FULL_IMAGE).convert("RGB")
|
|
582
|
+
resp = upload_image(image, dest_url)
|
|
583
|
+
assert resp
|
|
584
|
+
|
|
585
|
+
assert len(responses.calls) == 1
|
|
586
|
+
assert list(map(attrgetter("request.url"), responses.calls)) == [dest_url]
|
tests/test_utils.py
CHANGED
|
@@ -1,11 +1,33 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from arkindex_worker.utils import (
|
|
6
|
+
close_delete_file,
|
|
7
|
+
extract_tar_zst_archive,
|
|
8
|
+
parse_source_id,
|
|
9
|
+
)
|
|
4
10
|
|
|
5
11
|
FIXTURES = Path(__file__).absolute().parent / "data"
|
|
6
12
|
ARCHIVE = FIXTURES / "archive.tar.zst"
|
|
7
13
|
|
|
8
14
|
|
|
15
|
+
@pytest.mark.parametrize(
|
|
16
|
+
("source_id", "expected"),
|
|
17
|
+
[
|
|
18
|
+
(None, None),
|
|
19
|
+
("", None),
|
|
20
|
+
(
|
|
21
|
+
"cafecafe-cafe-cafe-cafe-cafecafecafe",
|
|
22
|
+
"cafecafe-cafe-cafe-cafe-cafecafecafe",
|
|
23
|
+
),
|
|
24
|
+
("manual", False),
|
|
25
|
+
],
|
|
26
|
+
)
|
|
27
|
+
def test_parse_source_id(source_id, expected):
|
|
28
|
+
assert parse_source_id(source_id) == expected
|
|
29
|
+
|
|
30
|
+
|
|
9
31
|
def test_extract_tar_zst_archive(tmp_path):
|
|
10
32
|
destination = tmp_path / "destination"
|
|
11
33
|
_, archive_path = extract_tar_zst_archive(ARCHIVE, destination)
|
|
File without changes
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from arkindex.mock import MockApiClient
|
|
6
|
+
from arkindex_worker.worker.base import BaseWorker
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.fixture(autouse=True)
|
|
10
|
+
def _setup_environment(responses, monkeypatch) -> None:
|
|
11
|
+
"""Setup needed environment variables"""
|
|
12
|
+
|
|
13
|
+
# Allow accessing remote API schemas
|
|
14
|
+
# defaulting to the prod environment
|
|
15
|
+
schema_url = os.environ.get(
|
|
16
|
+
"ARKINDEX_API_SCHEMA_URL",
|
|
17
|
+
"https://demo.arkindex.org/api/v1/openapi/?format=openapi-json",
|
|
18
|
+
)
|
|
19
|
+
responses.add_passthru(schema_url)
|
|
20
|
+
|
|
21
|
+
# Set schema url in environment
|
|
22
|
+
os.environ["ARKINDEX_API_SCHEMA_URL"] = schema_url
|
|
23
|
+
# Setup a fake worker run ID
|
|
24
|
+
os.environ["ARKINDEX_WORKER_RUN_ID"] = "1234-demo"
|
|
25
|
+
# Setup a fake corpus ID
|
|
26
|
+
os.environ["ARKINDEX_CORPUS_ID"] = "1234-corpus-id"
|
|
27
|
+
|
|
28
|
+
# Setup a mock api client instead of using a real one
|
|
29
|
+
def mock_setup_api_client(self):
|
|
30
|
+
self.api_client = MockApiClient()
|
|
31
|
+
|
|
32
|
+
monkeypatch.setattr(BaseWorker, "setup_api_client", mock_setup_api_client)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_dummy():
|
|
5
|
+
assert True
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_import():
|
|
9
|
+
"""Import our newly created module, through importlib to avoid parsing issues"""
|
|
10
|
+
worker = importlib.import_module("worker_demo.worker")
|
|
11
|
+
assert hasattr(worker, "Demo")
|
|
12
|
+
assert hasattr(worker.Demo, "process_element")
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from logging import Logger, getLogger
|
|
2
|
+
|
|
3
|
+
from arkindex_worker.models import Element
|
|
4
|
+
from arkindex_worker.worker import ElementsWorker
|
|
5
|
+
|
|
6
|
+
logger: Logger = getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Demo(ElementsWorker):
|
|
10
|
+
def process_element(self, element: Element) -> None:
|
|
11
|
+
logger.info(f"Demo processing element ({element.id})")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def main() -> None:
|
|
15
|
+
Demo(description="Demo ML worker for Arkindex").run()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
if __name__ == "__main__":
|
|
19
|
+
main()
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
arkindex_worker/__init__.py,sha256=OlgCtTC9MaWeejviY0a3iQpALcRQGMVArFVVYwTF6I8,162
|
|
2
|
-
arkindex_worker/cache.py,sha256=ZbXJo-O24W8x6nbS0IJm32Tas9CKLaHBBeyQyvF-Nyo,10903
|
|
3
|
-
arkindex_worker/image.py,sha256=uwfUE9hy0Iw-e3vU7OHmLSqouxbznWq08SykXmPD1Cs,14107
|
|
4
|
-
arkindex_worker/models.py,sha256=DVrZPIurSiOoHvj3t_Szwd0j1t6pnwBx_dqwhNakzN0,9528
|
|
5
|
-
arkindex_worker/utils.py,sha256=_lC1-RYvNWXEkK-AuF4_FraoggP1tYPdalNFSj4jDb4,6885
|
|
6
|
-
arkindex_worker/worker/__init__.py,sha256=Iun6jhuakKdCGKjQtgqDWEpWO1HrxK34RoxdzE5gcRs,19322
|
|
7
|
-
arkindex_worker/worker/base.py,sha256=4eG4v4vejvFv9UtTRhxEZkXEBVzlFd3rILHK8lt-mbc,19397
|
|
8
|
-
arkindex_worker/worker/classification.py,sha256=CoMIj7SFFlt90W1r5FQmsB80qK9Zfltcm3k-37FSHA0,10693
|
|
9
|
-
arkindex_worker/worker/dataset.py,sha256=LRZU_KkOuCRkxlkdqw1PHYnu1zmoQfm_OiY8Sqt6mi0,2754
|
|
10
|
-
arkindex_worker/worker/element.py,sha256=6lbJFu4vQrTOJvF4GqlnH3ynYtR-6WB5ljLV6wz7dGg,32283
|
|
11
|
-
arkindex_worker/worker/entity.py,sha256=l0gCoeaoUBFU7pv2iC4pHkSQVjiIur4M15P7Mg_WlaA,13601
|
|
12
|
-
arkindex_worker/worker/metadata.py,sha256=PnzyHkPyb-mtgItzRi4s-_f0dsEOM3ak8F_bFoqp3O0,6225
|
|
13
|
-
arkindex_worker/worker/task.py,sha256=cz3wJNPgogZv1lm_3lm7WScitQtYQtL6H6I7Xokq208,1475
|
|
14
|
-
arkindex_worker/worker/training.py,sha256=rhg4TPFo0ignnCkgbekUYmNXX8u2iZGyeM1VCs1R5kI,10140
|
|
15
|
-
arkindex_worker/worker/transcription.py,sha256=OCjgajaPcX7CPHG1fT4nZccfp6bG6Mqdz3POPc9_iYQ,18765
|
|
16
|
-
arkindex_worker/worker/version.py,sha256=uL-OrwuFZB8TNU6ePmdKIL3g3e-GE2tqHEWBRpXu-FU,1428
|
|
17
|
-
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
-
tests/conftest.py,sha256=9dmc6Lq-XpTZvv1hxfb0rl6zzfcwCtcPMqSBSeEONqc,21847
|
|
19
|
-
tests/test_base_worker.py,sha256=AF1pjvNckN80LVyLJ4ILXJ122fEnWtztK7ZncVDI_Ms,24976
|
|
20
|
-
tests/test_cache.py,sha256=ii0gyr0DrG7ChEs7pmT8hMdSguAOAcCze4bRMiFQxuk,10640
|
|
21
|
-
tests/test_dataset_worker.py,sha256=XxBCLRroCIq97P37_qWc9I7QiyE3zUL7fLAw1J_BI7E,27703
|
|
22
|
-
tests/test_element.py,sha256=2G9M15TLxQRmvrWM9Kw2ucnElh4kSv_oF_5FYwwAxTY,13181
|
|
23
|
-
tests/test_image.py,sha256=nIT0NhmuHtD9I1rcMSvqSfjQqvTE5Ko8tAQGLIkm_zo,15232
|
|
24
|
-
tests/test_merge.py,sha256=Q4zCbtZbe0wBfqE56gvAD06c6pDuhqnjKaioFqIgAQw,8331
|
|
25
|
-
tests/test_utils.py,sha256=pFXegcBvIuy1tJDDSgQtCbC_tRaoLjd2055R5lu3hS0,1236
|
|
26
|
-
tests/test_elements_worker/__init__.py,sha256=Fh4nkbbyJSMv_VtjQxnWrOqTnxXaaWI8S9WU0VrzCHs,179
|
|
27
|
-
tests/test_elements_worker/test_classifications.py,sha256=0_6vryoQc2-s3CQWANsEvajkyC3aub34cxb3r97pRsk,32027
|
|
28
|
-
tests/test_elements_worker/test_cli.py,sha256=DdCRKobesehL61c5QwaZOZCde7bsTlmeSN2iosQ5_2s,2873
|
|
29
|
-
tests/test_elements_worker/test_dataset.py,sha256=ElDPrYTTt8KzaZ_Xf5uslUD6_kiGZybntO10uqCquLo,12063
|
|
30
|
-
tests/test_elements_worker/test_elements.py,sha256=YQpF7y9depXElJcV9yECUGxVI7tF1kkJJ2ruYgXsVWE,79582
|
|
31
|
-
tests/test_elements_worker/test_entities.py,sha256=ZOFB3ckKJvNG2kIPUX_kz_378k3uQrJmvYHpR_xiVuo,33789
|
|
32
|
-
tests/test_elements_worker/test_metadata.py,sha256=c3kXPYRXVPDnGim28Ncg5YO4I0ejh3qyi7dBvbSYxMU,17739
|
|
33
|
-
tests/test_elements_worker/test_task.py,sha256=FCpxE9UpouKXgjGvWgNHEai_Hiy2d1YmqRG-_v2s27s,6312
|
|
34
|
-
tests/test_elements_worker/test_training.py,sha256=WeG-cDuJ-YhPgfKH47TtXBxyargtLuk7c8tsik2WnL8,8414
|
|
35
|
-
tests/test_elements_worker/test_transcriptions.py,sha256=6UWGriQVwEORunJYW11mGcD16voZGFY41i_NIdXuqnI,68750
|
|
36
|
-
tests/test_elements_worker/test_worker.py,sha256=zD8sY5yZFhuUr1txVX8z7bSgW4I2jNuzH5i1TM3qkZI,16491
|
|
37
|
-
arkindex_base_worker-0.3.6rc5.dist-info/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
|
|
38
|
-
arkindex_base_worker-0.3.6rc5.dist-info/METADATA,sha256=E6SjEuSYO53WaxWUvFEPes6Tsap5wYoz87mhdoSeCkE,3413
|
|
39
|
-
arkindex_base_worker-0.3.6rc5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
40
|
-
arkindex_base_worker-0.3.6rc5.dist-info/top_level.txt,sha256=TtagLI8LSv7GE7nG8MQqDFAJ5bNDPJn7Z5vizOgrWkA,22
|
|
41
|
-
arkindex_base_worker-0.3.6rc5.dist-info/RECORD,,
|
|
File without changes
|