arkindex-base-worker 0.3.7rc5__py3-none-any.whl → 0.5.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {arkindex_base_worker-0.3.7rc5.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/METADATA +18 -19
  2. arkindex_base_worker-0.5.0a1.dist-info/RECORD +61 -0
  3. {arkindex_base_worker-0.3.7rc5.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/WHEEL +1 -1
  4. {arkindex_base_worker-0.3.7rc5.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/top_level.txt +2 -0
  5. arkindex_worker/cache.py +1 -1
  6. arkindex_worker/image.py +167 -2
  7. arkindex_worker/models.py +18 -0
  8. arkindex_worker/utils.py +98 -4
  9. arkindex_worker/worker/__init__.py +117 -218
  10. arkindex_worker/worker/base.py +39 -46
  11. arkindex_worker/worker/classification.py +34 -18
  12. arkindex_worker/worker/corpus.py +86 -0
  13. arkindex_worker/worker/dataset.py +89 -26
  14. arkindex_worker/worker/element.py +352 -91
  15. arkindex_worker/worker/entity.py +13 -11
  16. arkindex_worker/worker/image.py +21 -0
  17. arkindex_worker/worker/metadata.py +26 -16
  18. arkindex_worker/worker/process.py +92 -0
  19. arkindex_worker/worker/task.py +5 -4
  20. arkindex_worker/worker/training.py +25 -10
  21. arkindex_worker/worker/transcription.py +89 -68
  22. arkindex_worker/worker/version.py +3 -1
  23. hooks/pre_gen_project.py +3 -0
  24. tests/__init__.py +8 -0
  25. tests/conftest.py +47 -58
  26. tests/test_base_worker.py +212 -12
  27. tests/test_dataset_worker.py +294 -437
  28. tests/test_elements_worker/{test_classifications.py → test_classification.py} +216 -100
  29. tests/test_elements_worker/test_cli.py +3 -11
  30. tests/test_elements_worker/test_corpus.py +168 -0
  31. tests/test_elements_worker/test_dataset.py +106 -157
  32. tests/test_elements_worker/test_element.py +427 -0
  33. tests/test_elements_worker/test_element_create_multiple.py +715 -0
  34. tests/test_elements_worker/test_element_create_single.py +528 -0
  35. tests/test_elements_worker/test_element_list_children.py +969 -0
  36. tests/test_elements_worker/test_element_list_parents.py +530 -0
  37. tests/test_elements_worker/{test_entities.py → test_entity_create.py} +37 -195
  38. tests/test_elements_worker/test_entity_list_and_check.py +160 -0
  39. tests/test_elements_worker/test_image.py +66 -0
  40. tests/test_elements_worker/test_metadata.py +252 -161
  41. tests/test_elements_worker/test_process.py +89 -0
  42. tests/test_elements_worker/test_task.py +8 -18
  43. tests/test_elements_worker/test_training.py +17 -8
  44. tests/test_elements_worker/test_transcription_create.py +873 -0
  45. tests/test_elements_worker/test_transcription_create_with_elements.py +951 -0
  46. tests/test_elements_worker/test_transcription_list.py +450 -0
  47. tests/test_elements_worker/test_version.py +60 -0
  48. tests/test_elements_worker/test_worker.py +578 -293
  49. tests/test_image.py +542 -209
  50. tests/test_merge.py +1 -2
  51. tests/test_utils.py +89 -4
  52. worker-demo/tests/__init__.py +0 -0
  53. worker-demo/tests/conftest.py +32 -0
  54. worker-demo/tests/test_worker.py +12 -0
  55. worker-demo/worker_demo/__init__.py +6 -0
  56. worker-demo/worker_demo/worker.py +19 -0
  57. arkindex_base_worker-0.3.7rc5.dist-info/RECORD +0 -41
  58. tests/test_elements_worker/test_elements.py +0 -2713
  59. tests/test_elements_worker/test_transcriptions.py +0 -2119
  60. {arkindex_base_worker-0.3.7rc5.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/LICENSE +0 -0
tests/test_merge.py CHANGED
@@ -161,7 +161,7 @@ def test_merge_from_worker(
161
161
  """
162
162
  responses.add(
163
163
  responses.GET,
164
- "http://testserver/api/v1/task/my_task/from-agent/",
164
+ "http://testserver/api/v1/task/my_task/",
165
165
  status=200,
166
166
  json={"parents": ["first", "second"]},
167
167
  )
@@ -181,7 +181,6 @@ def test_merge_from_worker(
181
181
  (tmp_path / "my_task").mkdir()
182
182
  mock_base_worker_with_cache.args = mock_base_worker_with_cache.parser.parse_args()
183
183
  mock_base_worker_with_cache.configure()
184
- mock_base_worker_with_cache.configure_cache()
185
184
  # Store parent tasks IDs as attribute
186
185
  assert mock_base_worker_with_cache.task_parents == ["first", "second"]
187
186
 
tests/test_utils.py CHANGED
@@ -1,9 +1,34 @@
1
- from pathlib import Path
1
+ import logging
2
2
 
3
- from arkindex_worker.utils import close_delete_file, extract_tar_zst_archive
3
+ import pytest
4
4
 
5
- FIXTURES = Path(__file__).absolute().parent / "data"
6
- ARCHIVE = FIXTURES / "archive.tar.zst"
5
+ from arkindex_worker.cache import unsupported_cache
6
+ from arkindex_worker.utils import (
7
+ DEFAULT_BATCH_SIZE,
8
+ batch_publication,
9
+ close_delete_file,
10
+ extract_tar_zst_archive,
11
+ parse_source_id,
12
+ )
13
+ from tests import FIXTURES_DIR
14
+
15
+ ARCHIVE = FIXTURES_DIR / "archive.tar.zst"
16
+
17
+
18
+ @pytest.mark.parametrize(
19
+ ("source_id", "expected"),
20
+ [
21
+ (None, None),
22
+ ("", None),
23
+ (
24
+ "cafecafe-cafe-cafe-cafe-cafecafecafe",
25
+ "cafecafe-cafe-cafe-cafe-cafecafecafe",
26
+ ),
27
+ ("manual", False),
28
+ ],
29
+ )
30
+ def test_parse_source_id(source_id, expected):
31
+ assert parse_source_id(source_id) == expected
7
32
 
8
33
 
9
34
  def test_extract_tar_zst_archive(tmp_path):
@@ -33,3 +58,63 @@ def test_close_delete_file(tmp_path):
33
58
  close_delete_file(archive_fd, archive_path)
34
59
 
35
60
  assert not archive_path.exists()
61
+
62
+
63
+ class TestMixin:
64
+ def __init__(self, use_cache: bool = False):
65
+ """
66
+ Args:
67
+ use_cache (bool, optional): To mock BaseWorker.use_cache attribute. Defaults to False.
68
+ """
69
+ self.use_cache = use_cache
70
+
71
+ @batch_publication
72
+ def custom_publication_in_batches(self, batch_size: int = DEFAULT_BATCH_SIZE):
73
+ return batch_size
74
+
75
+ @unsupported_cache
76
+ @batch_publication
77
+ def custom_publication_in_batches_without_cache(
78
+ self, batch_size: int = DEFAULT_BATCH_SIZE
79
+ ):
80
+ return batch_size
81
+
82
+
83
+ def test_batch_publication_decorator_no_parameter():
84
+ assert TestMixin().custom_publication_in_batches() == DEFAULT_BATCH_SIZE
85
+
86
+
87
+ @pytest.mark.parametrize("wrong_batch_size", [None, "not an int", 0])
88
+ def test_batch_publication_decorator_wrong_parameter(wrong_batch_size):
89
+ with pytest.raises(
90
+ AssertionError,
91
+ match="batch_size shouldn't be null and should be a strictly positive integer",
92
+ ):
93
+ TestMixin().custom_publication_in_batches(batch_size=wrong_batch_size)
94
+
95
+
96
+ @pytest.mark.parametrize("batch_size", [1, 10, DEFAULT_BATCH_SIZE])
97
+ def test_batch_publication_decorator_right_parameter(batch_size):
98
+ assert (
99
+ TestMixin().custom_publication_in_batches(batch_size=batch_size) == batch_size
100
+ )
101
+
102
+
103
+ def test_batch_publication_decorator_alongside_unsupported_cache(caplog):
104
+ # Capture log messages
105
+ caplog.clear()
106
+ with caplog.at_level(logging.WARNING):
107
+ # Call the helper
108
+ assert (
109
+ TestMixin(use_cache=True).custom_publication_in_batches_without_cache()
110
+ == DEFAULT_BATCH_SIZE
111
+ )
112
+
113
+ # Check logs
114
+ assert caplog.record_tuples == [
115
+ (
116
+ "arkindex_worker",
117
+ logging.WARNING,
118
+ "This API helper `custom_publication_in_batches_without_cache` did not update the cache database",
119
+ ),
120
+ ]
File without changes
@@ -0,0 +1,32 @@
1
+ import os
2
+
3
+ import pytest
4
+
5
+ from arkindex.mock import MockApiClient
6
+ from arkindex_worker.worker.base import BaseWorker
7
+
8
+
9
+ @pytest.fixture(autouse=True)
10
+ def _setup_environment(responses, monkeypatch) -> None:
11
+ """Setup needed environment variables"""
12
+
13
+ # Allow accessing remote API schemas
14
+ # defaulting to the prod environment
15
+ schema_url = os.environ.get(
16
+ "ARKINDEX_API_SCHEMA_URL",
17
+ "https://demo.arkindex.org/api/v1/openapi/?format=openapi-json",
18
+ )
19
+ responses.add_passthru(schema_url)
20
+
21
+ # Set schema url in environment
22
+ os.environ["ARKINDEX_API_SCHEMA_URL"] = schema_url
23
+ # Setup a fake worker run ID
24
+ os.environ["ARKINDEX_WORKER_RUN_ID"] = "1234-demo"
25
+ # Setup a fake corpus ID
26
+ os.environ["ARKINDEX_CORPUS_ID"] = "1234-corpus-id"
27
+
28
+ # Setup a mock api client instead of using a real one
29
+ def mock_setup_api_client(self):
30
+ self.api_client = MockApiClient()
31
+
32
+ monkeypatch.setattr(BaseWorker, "setup_api_client", mock_setup_api_client)
@@ -0,0 +1,12 @@
1
+ import importlib
2
+
3
+
4
+ def test_dummy():
5
+ assert True
6
+
7
+
8
+ def test_import():
9
+ """Import our newly created module, through importlib to avoid parsing issues"""
10
+ worker = importlib.import_module("worker_demo.worker")
11
+ assert hasattr(worker, "Demo")
12
+ assert hasattr(worker.Demo, "process_element")
@@ -0,0 +1,6 @@
1
+ import logging
2
+
3
+ logging.basicConfig(
4
+ level=logging.INFO,
5
+ format="%(asctime)s %(levelname)s/%(name)s: %(message)s",
6
+ )
@@ -0,0 +1,19 @@
1
+ from logging import Logger, getLogger
2
+
3
+ from arkindex_worker.models import Element
4
+ from arkindex_worker.worker import ElementsWorker
5
+
6
+ logger: Logger = getLogger(__name__)
7
+
8
+
9
+ class Demo(ElementsWorker):
10
+ def process_element(self, element: Element) -> None:
11
+ logger.info(f"Demo processing element ({element.id})")
12
+
13
+
14
+ def main() -> None:
15
+ Demo(description="Demo ML worker for Arkindex").run()
16
+
17
+
18
+ if __name__ == "__main__":
19
+ main()
@@ -1,41 +0,0 @@
1
- arkindex_worker/__init__.py,sha256=OlgCtTC9MaWeejviY0a3iQpALcRQGMVArFVVYwTF6I8,162
2
- arkindex_worker/cache.py,sha256=FTlB0coXofn5zTNRTcVIvh709mcw4a1bPGqkwWjKs3w,11248
3
- arkindex_worker/image.py,sha256=9-k_Wojk-sLbgvBSi7tWiiDc9YApWauJpHGKRay_nmo,14166
4
- arkindex_worker/models.py,sha256=HdKFw3qk4WIWC-DrHDkhsw0mHP3OILuCLFf7aTjruZU,9526
5
- arkindex_worker/utils.py,sha256=VSO8c21nsSaUCkyJaFX8wOwDQ0tztLOBFtiGvqlT0zU,6900
6
- arkindex_worker/worker/__init__.py,sha256=I8QmdAs659SalxNjtCu2K2ItdyUlXYm3mK_WhZdjgBs,19498
7
- arkindex_worker/worker/base.py,sha256=7ii3rZai6IB0-eB0TJ6pg-IhxMmW4izoJAKJKczbyZ4,19934
8
- arkindex_worker/worker/classification.py,sha256=JVz-6YEeuavOy7zGfQi4nE_wpj9hwMUZDXTem-hXQY8,10328
9
- arkindex_worker/worker/dataset.py,sha256=qzjaXJtfeNCP2acsHbqp5tjQk-KpLHwVzjDAExeAmVg,3228
10
- arkindex_worker/worker/element.py,sha256=AWK3YJSHWy3j4ajntJloi_2X4zxsgXZ6c6dzphgq3OI,33848
11
- arkindex_worker/worker/entity.py,sha256=YT2Ttdn-L5TRoDdhOI3Z4GE1vtkWl7tKZqbYrtxZ2Ug,14630
12
- arkindex_worker/worker/metadata.py,sha256=SC6apVaOjFrmYw5b-njhqIlH-_r0ExbNpZeQZzlUjBE,6669
13
- arkindex_worker/worker/task.py,sha256=cz3wJNPgogZv1lm_3lm7WScitQtYQtL6H6I7Xokq208,1475
14
- arkindex_worker/worker/training.py,sha256=SOs3YKGikTr3rdWYp9H-jbtgRnZxQAoqtwB26ztx9j8,10235
15
- arkindex_worker/worker/transcription.py,sha256=6R7ofcGnNqX4rjT0kRKIE-G9FHq2TJ1tfztNM5sTqYE,20464
16
- arkindex_worker/worker/version.py,sha256=cs2pdlDxpKRO2Oldvcu54w-D_DQhf1cdeEt4tKX_QYs,1927
17
- tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- tests/conftest.py,sha256=wzKXRnS7OKQCNHrlDaQhMC8EXlsQTY_S4L9U_hXbjpM,22004
19
- tests/test_base_worker.py,sha256=Uq6_MpLW23gmKFXkU-SyDUaA_4dlViLBGG4e3gpBBz0,24512
20
- tests/test_cache.py,sha256=ii0gyr0DrG7ChEs7pmT8hMdSguAOAcCze4bRMiFQxuk,10640
21
- tests/test_dataset_worker.py,sha256=Q-3gVu2FNa6mJVkUW-PUVgyUAvRkxSzLCJrPhwoJlxQ,28273
22
- tests/test_element.py,sha256=2G9M15TLxQRmvrWM9Kw2ucnElh4kSv_oF_5FYwwAxTY,13181
23
- tests/test_image.py,sha256=FZv8njLxh45sVgmY71UFHt0lv1cHr0cK4rrtPhQleX8,16262
24
- tests/test_merge.py,sha256=Q4zCbtZbe0wBfqE56gvAD06c6pDuhqnjKaioFqIgAQw,8331
25
- tests/test_utils.py,sha256=pFXegcBvIuy1tJDDSgQtCbC_tRaoLjd2055R5lu3hS0,1236
26
- tests/test_elements_worker/__init__.py,sha256=Fh4nkbbyJSMv_VtjQxnWrOqTnxXaaWI8S9WU0VrzCHs,179
27
- tests/test_elements_worker/test_classifications.py,sha256=vU6al1THtDSmERyVscMXaqiRPwTllcpRUHyeyBQ8M9U,26417
28
- tests/test_elements_worker/test_cli.py,sha256=BsFTswLti63WAZ2pf6ipiZKWJJyCQuSfuKnSlESuK8g,2878
29
- tests/test_elements_worker/test_dataset.py,sha256=-kVll1NcMPWkIx8D7r-Z5neEGkFiZ9YQfC4eTMIfjg0,13475
30
- tests/test_elements_worker/test_elements.py,sha256=6XKtgXSVQJnTSgTHWwEVsAtIwLBapjYjUYPUdjxcHsY,84971
31
- tests/test_elements_worker/test_entities.py,sha256=yi1mXzvKvNwUNMzo0UZ56YOIJstYHcLyeepPJ8f10MQ,34557
32
- tests/test_elements_worker/test_metadata.py,sha256=b9CNv4W31TRJqYauvX_pRIN2SvnybaLqF-FWoFwa2Vc,18672
33
- tests/test_elements_worker/test_task.py,sha256=FCpxE9UpouKXgjGvWgNHEai_Hiy2d1YmqRG-_v2s27s,6312
34
- tests/test_elements_worker/test_training.py,sha256=WeG-cDuJ-YhPgfKH47TtXBxyargtLuk7c8tsik2WnL8,8414
35
- tests/test_elements_worker/test_transcriptions.py,sha256=WVJG26sZyY66fu-Eka9A1_WWIeNI2scogjypzURnp8A,73468
36
- tests/test_elements_worker/test_worker.py,sha256=7-jGJVT3yMGpIyN96Uafz5eIUrO4ieNLgw0k1D8BhGc,17163
37
- arkindex_base_worker-0.3.7rc5.dist-info/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
38
- arkindex_base_worker-0.3.7rc5.dist-info/METADATA,sha256=5i7tDRQVCiM4oo8mc0F8X_wsdSvr-mlqjmjhHHAmbNc,3411
39
- arkindex_base_worker-0.3.7rc5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
40
- arkindex_base_worker-0.3.7rc5.dist-info/top_level.txt,sha256=TtagLI8LSv7GE7nG8MQqDFAJ5bNDPJn7Z5vizOgrWkA,22
41
- arkindex_base_worker-0.3.7rc5.dist-info/RECORD,,