arkindex-base-worker 0.4.0rc6__py3-none-any.whl → 0.5.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.4.0rc6.dist-info → arkindex_base_worker-0.5.0a2.dist-info}/METADATA +3 -3
- {arkindex_base_worker-0.4.0rc6.dist-info → arkindex_base_worker-0.5.0a2.dist-info}/RECORD +27 -27
- {arkindex_base_worker-0.4.0rc6.dist-info → arkindex_base_worker-0.5.0a2.dist-info}/WHEEL +1 -1
- arkindex_worker/__init__.py +3 -0
- arkindex_worker/cache.py +3 -3
- arkindex_worker/image.py +98 -48
- arkindex_worker/utils.py +2 -1
- arkindex_worker/worker/__init__.py +17 -17
- arkindex_worker/worker/base.py +6 -6
- arkindex_worker/worker/classification.py +34 -32
- arkindex_worker/worker/corpus.py +3 -3
- arkindex_worker/worker/dataset.py +9 -9
- arkindex_worker/worker/element.py +193 -189
- arkindex_worker/worker/entity.py +62 -63
- arkindex_worker/worker/image.py +3 -3
- arkindex_worker/worker/metadata.py +27 -27
- arkindex_worker/worker/task.py +9 -9
- arkindex_worker/worker/training.py +15 -11
- arkindex_worker/worker/transcription.py +77 -71
- tests/conftest.py +22 -22
- tests/test_dataset_worker.py +1 -1
- tests/test_elements_worker/test_training.py +8 -8
- tests/test_elements_worker/test_worker.py +15 -14
- tests/test_image.py +234 -124
- tests/test_utils.py +37 -0
- {arkindex_base_worker-0.4.0rc6.dist-info → arkindex_base_worker-0.5.0a2.dist-info}/LICENSE +0 -0
- {arkindex_base_worker-0.4.0rc6.dist-info → arkindex_base_worker-0.5.0a2.dist-info}/top_level.txt +0 -0
tests/conftest.py
CHANGED
|
@@ -46,7 +46,7 @@ def _disable_sleep(monkeypatch):
|
|
|
46
46
|
monkeypatch.setattr(time, "sleep", lambda x: None)
|
|
47
47
|
|
|
48
48
|
|
|
49
|
-
@pytest.fixture
|
|
49
|
+
@pytest.fixture
|
|
50
50
|
def _cache_yaml(monkeypatch):
|
|
51
51
|
"""
|
|
52
52
|
Cache all calls to yaml.safe_load in order to speedup
|
|
@@ -111,7 +111,7 @@ def _give_env_variable(monkeypatch):
|
|
|
111
111
|
monkeypatch.setenv("ARKINDEX_WORKER_RUN_ID", "56785678-5678-5678-5678-567856785678")
|
|
112
112
|
|
|
113
113
|
|
|
114
|
-
@pytest.fixture
|
|
114
|
+
@pytest.fixture
|
|
115
115
|
def _mock_worker_run_api(responses):
|
|
116
116
|
"""Provide a mock API response to get worker run information"""
|
|
117
117
|
payload = {
|
|
@@ -180,7 +180,7 @@ def _mock_worker_run_api(responses):
|
|
|
180
180
|
)
|
|
181
181
|
|
|
182
182
|
|
|
183
|
-
@pytest.fixture
|
|
183
|
+
@pytest.fixture
|
|
184
184
|
def _mock_worker_run_no_revision_api(responses):
|
|
185
185
|
"""Provide a mock API response to get worker run not linked to a revision information"""
|
|
186
186
|
payload = {
|
|
@@ -247,7 +247,7 @@ def _mock_worker_run_no_revision_api(responses):
|
|
|
247
247
|
)
|
|
248
248
|
|
|
249
249
|
|
|
250
|
-
@pytest.fixture
|
|
250
|
+
@pytest.fixture
|
|
251
251
|
def _mock_activity_calls(responses):
|
|
252
252
|
"""
|
|
253
253
|
Mock responses when updating the activity state for multiple element of the same version
|
|
@@ -259,7 +259,7 @@ def _mock_activity_calls(responses):
|
|
|
259
259
|
)
|
|
260
260
|
|
|
261
261
|
|
|
262
|
-
@pytest.fixture
|
|
262
|
+
@pytest.fixture
|
|
263
263
|
def mock_elements_worker(monkeypatch, _mock_worker_run_api):
|
|
264
264
|
"""Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest"""
|
|
265
265
|
monkeypatch.setattr(sys, "argv", ["worker"])
|
|
@@ -268,7 +268,7 @@ def mock_elements_worker(monkeypatch, _mock_worker_run_api):
|
|
|
268
268
|
return worker
|
|
269
269
|
|
|
270
270
|
|
|
271
|
-
@pytest.fixture
|
|
271
|
+
@pytest.fixture
|
|
272
272
|
def mock_elements_worker_read_only(monkeypatch):
|
|
273
273
|
"""Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest"""
|
|
274
274
|
monkeypatch.setattr(sys, "argv", ["worker", "--dev"])
|
|
@@ -277,7 +277,7 @@ def mock_elements_worker_read_only(monkeypatch):
|
|
|
277
277
|
return worker
|
|
278
278
|
|
|
279
279
|
|
|
280
|
-
@pytest.fixture
|
|
280
|
+
@pytest.fixture
|
|
281
281
|
def mock_elements_worker_with_list(monkeypatch, responses, mock_elements_worker):
|
|
282
282
|
"""
|
|
283
283
|
Mock a worker instance to list and retrieve a single element
|
|
@@ -296,7 +296,7 @@ def mock_elements_worker_with_list(monkeypatch, responses, mock_elements_worker)
|
|
|
296
296
|
return mock_elements_worker
|
|
297
297
|
|
|
298
298
|
|
|
299
|
-
@pytest.fixture
|
|
299
|
+
@pytest.fixture
|
|
300
300
|
def mock_cache_db(tmp_path):
|
|
301
301
|
cache_path = tmp_path / "db.sqlite"
|
|
302
302
|
|
|
@@ -307,7 +307,7 @@ def mock_cache_db(tmp_path):
|
|
|
307
307
|
return cache_path
|
|
308
308
|
|
|
309
309
|
|
|
310
|
-
@pytest.fixture
|
|
310
|
+
@pytest.fixture
|
|
311
311
|
def mock_base_worker_with_cache(monkeypatch, _mock_worker_run_api):
|
|
312
312
|
"""Build a BaseWorker using SQLite cache, also mocking a PONOS_TASK"""
|
|
313
313
|
monkeypatch.setattr(sys, "argv", ["worker"])
|
|
@@ -318,7 +318,7 @@ def mock_base_worker_with_cache(monkeypatch, _mock_worker_run_api):
|
|
|
318
318
|
return worker
|
|
319
319
|
|
|
320
320
|
|
|
321
|
-
@pytest.fixture
|
|
321
|
+
@pytest.fixture
|
|
322
322
|
def mock_elements_worker_with_cache(monkeypatch, mock_cache_db, _mock_worker_run_api):
|
|
323
323
|
"""Build and configure an ElementsWorker using SQLite cache with fixed CLI parameters to avoid issues with pytest"""
|
|
324
324
|
monkeypatch.setattr(sys, "argv", ["worker", "-d", str(mock_cache_db)])
|
|
@@ -329,17 +329,17 @@ def mock_elements_worker_with_cache(monkeypatch, mock_cache_db, _mock_worker_run
|
|
|
329
329
|
return worker
|
|
330
330
|
|
|
331
331
|
|
|
332
|
-
@pytest.fixture
|
|
332
|
+
@pytest.fixture
|
|
333
333
|
def model_file_dir():
|
|
334
334
|
return SAMPLES_DIR / "model_files"
|
|
335
335
|
|
|
336
336
|
|
|
337
|
-
@pytest.fixture
|
|
337
|
+
@pytest.fixture
|
|
338
338
|
def model_file_dir_with_subfolder():
|
|
339
339
|
return SAMPLES_DIR / "root_folder"
|
|
340
340
|
|
|
341
341
|
|
|
342
|
-
@pytest.fixture
|
|
342
|
+
@pytest.fixture
|
|
343
343
|
def fake_dummy_worker():
|
|
344
344
|
api_client = MockApiClient()
|
|
345
345
|
worker = ElementsWorker()
|
|
@@ -347,7 +347,7 @@ def fake_dummy_worker():
|
|
|
347
347
|
return worker
|
|
348
348
|
|
|
349
349
|
|
|
350
|
-
@pytest.fixture
|
|
350
|
+
@pytest.fixture
|
|
351
351
|
def _mock_cached_elements(mock_cache_db):
|
|
352
352
|
"""Insert few elements in local cache"""
|
|
353
353
|
CachedElement.create(
|
|
@@ -392,7 +392,7 @@ def _mock_cached_elements(mock_cache_db):
|
|
|
392
392
|
assert CachedElement.select().count() == 5
|
|
393
393
|
|
|
394
394
|
|
|
395
|
-
@pytest.fixture
|
|
395
|
+
@pytest.fixture
|
|
396
396
|
def _mock_cached_images(mock_cache_db):
|
|
397
397
|
"""Insert few elements in local cache"""
|
|
398
398
|
CachedImage.create(
|
|
@@ -404,7 +404,7 @@ def _mock_cached_images(mock_cache_db):
|
|
|
404
404
|
assert CachedImage.select().count() == 1
|
|
405
405
|
|
|
406
406
|
|
|
407
|
-
@pytest.fixture
|
|
407
|
+
@pytest.fixture
|
|
408
408
|
def _mock_cached_transcriptions(mock_cache_db):
|
|
409
409
|
"""Insert few transcriptions in local cache, on a shared element"""
|
|
410
410
|
CachedElement.create(
|
|
@@ -493,7 +493,7 @@ def _mock_cached_transcriptions(mock_cache_db):
|
|
|
493
493
|
)
|
|
494
494
|
|
|
495
495
|
|
|
496
|
-
@pytest.fixture
|
|
496
|
+
@pytest.fixture
|
|
497
497
|
def mock_databases(tmp_path):
|
|
498
498
|
"""
|
|
499
499
|
Initialize several temporary databases
|
|
@@ -576,7 +576,7 @@ def mock_databases(tmp_path):
|
|
|
576
576
|
return out
|
|
577
577
|
|
|
578
578
|
|
|
579
|
-
@pytest.fixture
|
|
579
|
+
@pytest.fixture
|
|
580
580
|
def default_dataset():
|
|
581
581
|
return Dataset(
|
|
582
582
|
{
|
|
@@ -594,12 +594,12 @@ def default_dataset():
|
|
|
594
594
|
)
|
|
595
595
|
|
|
596
596
|
|
|
597
|
-
@pytest.fixture
|
|
597
|
+
@pytest.fixture
|
|
598
598
|
def default_train_set(default_dataset):
|
|
599
599
|
return Set(name="train", dataset=default_dataset)
|
|
600
600
|
|
|
601
601
|
|
|
602
|
-
@pytest.fixture
|
|
602
|
+
@pytest.fixture
|
|
603
603
|
def mock_dataset_worker(monkeypatch, mocker, _mock_worker_run_api):
|
|
604
604
|
monkeypatch.setenv("PONOS_TASK", "my_task")
|
|
605
605
|
mocker.patch.object(sys, "argv", ["worker"])
|
|
@@ -615,7 +615,7 @@ def mock_dataset_worker(monkeypatch, mocker, _mock_worker_run_api):
|
|
|
615
615
|
return dataset_worker
|
|
616
616
|
|
|
617
617
|
|
|
618
|
-
@pytest.fixture
|
|
618
|
+
@pytest.fixture
|
|
619
619
|
def mock_dev_dataset_worker(mocker):
|
|
620
620
|
mocker.patch.object(
|
|
621
621
|
sys,
|
|
@@ -640,7 +640,7 @@ def mock_dev_dataset_worker(mocker):
|
|
|
640
640
|
return dataset_worker
|
|
641
641
|
|
|
642
642
|
|
|
643
|
-
@pytest.fixture
|
|
643
|
+
@pytest.fixture
|
|
644
644
|
def default_artifact():
|
|
645
645
|
return Artifact(
|
|
646
646
|
**{
|
tests/test_dataset_worker.py
CHANGED
|
@@ -8,7 +8,7 @@ from arkindex_worker.worker import BaseWorker
|
|
|
8
8
|
from arkindex_worker.worker.training import TrainingMixin, create_archive
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
@pytest.fixture
|
|
11
|
+
@pytest.fixture
|
|
12
12
|
def mock_training_worker(monkeypatch):
|
|
13
13
|
class TrainingWorker(BaseWorker, TrainingMixin):
|
|
14
14
|
"""
|
|
@@ -22,7 +22,7 @@ def mock_training_worker(monkeypatch):
|
|
|
22
22
|
return training_worker
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
@pytest.fixture
|
|
25
|
+
@pytest.fixture
|
|
26
26
|
def default_model_version():
|
|
27
27
|
return {
|
|
28
28
|
"id": "model_version_id",
|
|
@@ -51,9 +51,9 @@ def test_create_archive(model_file_dir):
|
|
|
51
51
|
archive_hash,
|
|
52
52
|
):
|
|
53
53
|
assert zst_archive_path.exists(), "The archive was not created"
|
|
54
|
-
assert (
|
|
55
|
-
|
|
56
|
-
)
|
|
54
|
+
assert hash == "c5aedde18a768757351068b840c8c8f9", (
|
|
55
|
+
"Hash was not properly computed"
|
|
56
|
+
)
|
|
57
57
|
assert 300 < size < 700
|
|
58
58
|
|
|
59
59
|
assert not zst_archive_path.exists(), "Auto removal failed"
|
|
@@ -69,9 +69,9 @@ def test_create_archive_with_subfolder(model_file_dir_with_subfolder):
|
|
|
69
69
|
archive_hash,
|
|
70
70
|
):
|
|
71
71
|
assert zst_archive_path.exists(), "The archive was not created"
|
|
72
|
-
assert (
|
|
73
|
-
|
|
74
|
-
)
|
|
72
|
+
assert hash == "3e453881404689e6e125144d2db3e605", (
|
|
73
|
+
"Hash was not properly computed"
|
|
74
|
+
)
|
|
75
75
|
assert 300 < size < 1500
|
|
76
76
|
|
|
77
77
|
assert not zst_archive_path.exists(), "Auto removal failed"
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import logging
|
|
2
3
|
import sys
|
|
3
4
|
from argparse import Namespace
|
|
4
5
|
from uuid import UUID
|
|
@@ -685,7 +686,7 @@ def test_run_cache(monkeypatch, mocker, mock_elements_worker_with_cache):
|
|
|
685
686
|
|
|
686
687
|
|
|
687
688
|
def test_start_activity_conflict(
|
|
688
|
-
monkeypatch, responses,
|
|
689
|
+
monkeypatch, responses, mock_elements_worker_with_list, caplog
|
|
689
690
|
):
|
|
690
691
|
# Disable second configure call from run()
|
|
691
692
|
monkeypatch.setattr(mock_elements_worker_with_list, "configure", lambda: None)
|
|
@@ -700,9 +701,6 @@ def test_start_activity_conflict(
|
|
|
700
701
|
content="Either this activity does not exists or this state is not allowed.",
|
|
701
702
|
),
|
|
702
703
|
)
|
|
703
|
-
from arkindex_worker.worker import logger
|
|
704
|
-
|
|
705
|
-
logger.info = mocker.MagicMock()
|
|
706
704
|
|
|
707
705
|
mock_elements_worker_with_list.run()
|
|
708
706
|
|
|
@@ -716,14 +714,15 @@ def test_start_activity_conflict(
|
|
|
716
714
|
"http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
|
|
717
715
|
),
|
|
718
716
|
]
|
|
719
|
-
assert
|
|
720
|
-
|
|
721
|
-
|
|
717
|
+
assert [(record.levelno, record.message) for record in caplog.records] == [
|
|
718
|
+
(logging.INFO, "Processing page Test Page n°1 (1234-deadbeef) (1/1)"),
|
|
719
|
+
(logging.INFO, "Skipping element 1234-deadbeef as it was already processed"),
|
|
720
|
+
(logging.INFO, "Ran on 1 element: 1 completed, 0 failed"),
|
|
722
721
|
]
|
|
723
722
|
|
|
724
723
|
|
|
725
724
|
def test_start_activity_error(
|
|
726
|
-
monkeypatch, responses,
|
|
725
|
+
monkeypatch, responses, mock_elements_worker_with_list, caplog
|
|
727
726
|
):
|
|
728
727
|
# Disable second configure call from run()
|
|
729
728
|
monkeypatch.setattr(mock_elements_worker_with_list, "configure", lambda: None)
|
|
@@ -732,11 +731,8 @@ def test_start_activity_error(
|
|
|
732
731
|
responses.add(
|
|
733
732
|
responses.PUT,
|
|
734
733
|
"http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
|
|
735
|
-
body=Exception("A wild Petilil appears
|
|
734
|
+
body=Exception("A wild Petilil appears!"),
|
|
736
735
|
)
|
|
737
|
-
from arkindex_worker.worker import logger
|
|
738
|
-
|
|
739
|
-
logger.error = mocker.MagicMock()
|
|
740
736
|
|
|
741
737
|
with pytest.raises(SystemExit):
|
|
742
738
|
mock_elements_worker_with_list.run()
|
|
@@ -755,8 +751,13 @@ def test_start_activity_error(
|
|
|
755
751
|
"http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
|
|
756
752
|
),
|
|
757
753
|
]
|
|
758
|
-
assert
|
|
759
|
-
|
|
754
|
+
assert [(record.levelno, record.message) for record in caplog.records] == [
|
|
755
|
+
(logging.INFO, "Processing page Test Page n°1 (1234-deadbeef) (1/1)"),
|
|
756
|
+
(
|
|
757
|
+
logging.WARNING,
|
|
758
|
+
"Failed running worker on element 1234-deadbeef: Exception('A wild Petilil appears!')",
|
|
759
|
+
),
|
|
760
|
+
(logging.ERROR, "Ran on 1 element: 0 completed, 1 failed"),
|
|
760
761
|
]
|
|
761
762
|
|
|
762
763
|
|