PyPI - arkindex-base-worker - Versions diffs - 0.3.5rc6__py3-none-any.whl → 0.3.6rc2__py3-none-any.whl - Mend

arkindex-base-worker 0.3.5rc6py3-none-any.whl → 0.3.6rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

arkindex_base_worker-0.3.6rc2.dist-info/METADATA +39 -0
arkindex_base_worker-0.3.6rc2.dist-info/RECORD +40 -0
arkindex_worker/__init__.py +0 -1
arkindex_worker/cache.py +19 -25
arkindex_worker/image.py +16 -17
arkindex_worker/models.py +24 -21
arkindex_worker/utils.py +18 -19
arkindex_worker/worker/__init__.py +17 -27
arkindex_worker/worker/base.py +12 -7
arkindex_worker/worker/classification.py +13 -15
arkindex_worker/worker/dataset.py +3 -4
arkindex_worker/worker/element.py +80 -76
arkindex_worker/worker/entity.py +28 -30
arkindex_worker/worker/metadata.py +21 -27
arkindex_worker/worker/task.py +2 -3
arkindex_worker/worker/training.py +25 -26
arkindex_worker/worker/transcription.py +37 -34
arkindex_worker/worker/version.py +1 -2
tests/conftest.py +56 -76
tests/test_base_worker.py +38 -32
tests/test_cache.py +14 -7
tests/test_dataset_worker.py +25 -22
tests/test_element.py +0 -1
tests/test_elements_worker/__init__.py +0 -1
tests/test_elements_worker/test_classifications.py +0 -1
tests/test_elements_worker/test_cli.py +22 -17
tests/test_elements_worker/test_dataset.py +9 -10
tests/test_elements_worker/test_elements.py +58 -63
tests/test_elements_worker/test_entities.py +10 -20
tests/test_elements_worker/test_metadata.py +72 -96
tests/test_elements_worker/test_task.py +22 -20
tests/test_elements_worker/test_training.py +20 -13
tests/test_elements_worker/test_transcriptions.py +6 -10
tests/test_elements_worker/test_worker.py +16 -14
tests/test_image.py +21 -20
tests/test_merge.py +5 -6
tests/test_utils.py +0 -1
arkindex_base_worker-0.3.5rc6.dist-info/METADATA +0 -27
arkindex_base_worker-0.3.5rc6.dist-info/RECORD +0 -42
arkindex_worker/git.py +0 -392
tests/test_git.py +0 -480
{arkindex_base_worker-0.3.5rc6.dist-info → arkindex_base_worker-0.3.6rc2.dist-info}/WHEEL +0 -0
{arkindex_base_worker-0.3.5rc6.dist-info → arkindex_base_worker-0.3.6rc2.dist-info}/top_level.txt +0 -0

tests/conftest.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import hashlib
 import json
 import os
@@ -19,10 +18,10 @@ from arkindex_worker.cache import (
     CachedImage,
     CachedTranscription,
     Version,
+    create_tables,
     create_version_table,
     init_cache_db,
 )
-from arkindex_worker.git import GitHelper, GitlabHelper
 from arkindex_worker.models import Artifact, Dataset
 from arkindex_worker.worker import BaseWorker, DatasetWorker, ElementsWorker
 from arkindex_worker.worker.dataset import DatasetState
@@ -37,7 +36,7 @@ __yaml_cache = {}
 @pytest.fixture(autouse=True)
-def disable_sleep(monkeypatch):
+def _disable_sleep(monkeypatch):
     """
     Do not sleep at all in between API executions
     when errors occur in unit tests.
@@ -46,8 +45,8 @@ def disable_sleep(monkeypatch):
     monkeypatch.setattr(time, "sleep", lambda x: None)
-@pytest.fixture
-def cache_yaml(monkeypatch):
+@pytest.fixture()
+def _cache_yaml(monkeypatch):
     """
     Cache all calls to yaml.safe_load in order to speedup
     every test cases that load the OpenAPI schema
@@ -75,7 +74,7 @@ def cache_yaml(monkeypatch):
 @pytest.fixture(autouse=True)
-def setup_api(responses, monkeypatch, cache_yaml):
+def _setup_api(responses, monkeypatch, _cache_yaml):
     # Always use the environment variable first
     schema_url = os.environ.get("ARKINDEX_API_SCHEMA_URL")
     if schema_url is None:
@@ -106,13 +105,13 @@ def setup_api(responses, monkeypatch, cache_yaml):
 @pytest.fixture(autouse=True)
-def give_env_variable(request, monkeypatch):
+def _give_env_variable(monkeypatch):
     """Defines required environment variables"""
     monkeypatch.setenv("ARKINDEX_WORKER_RUN_ID", "56785678-5678-5678-5678-567856785678")
-@pytest.fixture
-def mock_worker_run_api(responses):
+@pytest.fixture()
+def _mock_worker_run_api(responses):
     """Provide a mock API response to get worker run information"""
     payload = {
         "id": "56785678-5678-5678-5678-567856785678",
@@ -180,8 +179,8 @@ def mock_worker_run_api(responses):
     )
-@pytest.fixture
-def mock_worker_run_no_revision_api(responses):
+@pytest.fixture()
+def _mock_worker_run_no_revision_api(responses):
     """Provide a mock API response to get worker run not linked to a revision information"""
     payload = {
         "id": "56785678-5678-5678-5678-567856785678",
@@ -247,8 +246,8 @@ def mock_worker_run_no_revision_api(responses):
     )
-@pytest.fixture
-def mock_activity_calls(responses):
+@pytest.fixture()
+def _mock_activity_calls(responses):
     """
     Mock responses when updating the activity state for multiple element of the same version
     """
@@ -259,8 +258,8 @@ def mock_activity_calls(responses):
     )
-@pytest.fixture
-def mock_elements_worker(monkeypatch, mock_worker_run_api):
+@pytest.fixture()
+def mock_elements_worker(monkeypatch, _mock_worker_run_api):
     """Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest"""
     monkeypatch.setattr(sys, "argv", ["worker"])
     worker = ElementsWorker()
@@ -268,7 +267,7 @@ def mock_elements_worker(monkeypatch, mock_worker_run_api):
     return worker
-@pytest.fixture
+@pytest.fixture()
 def mock_elements_worker_read_only(monkeypatch):
     """Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest"""
     monkeypatch.setattr(sys, "argv", ["worker", "--dev"])
@@ -277,7 +276,7 @@ def mock_elements_worker_read_only(monkeypatch):
     return worker
-@pytest.fixture
+@pytest.fixture()
 def mock_elements_worker_with_list(monkeypatch, responses, mock_elements_worker):
     """
     Mock a worker instance to list and retrieve a single element
@@ -298,8 +297,19 @@ def mock_elements_worker_with_list(monkeypatch, responses, mock_elements_worker)
     return mock_elements_worker
-@pytest.fixture
-def mock_base_worker_with_cache(mocker, monkeypatch, mock_worker_run_api):
+@pytest.fixture()
+def mock_cache_db(tmp_path):
+    cache_path = tmp_path / "db.sqlite"
+    init_cache_db(cache_path)
+    create_version_table()
+    create_tables()
+    return cache_path
+@pytest.fixture()
+def mock_base_worker_with_cache(monkeypatch, _mock_worker_run_api):
     """Build a BaseWorker using SQLite cache, also mocking a PONOS_TASK"""
     monkeypatch.setattr(sys, "argv", ["worker"])
@@ -309,13 +319,10 @@ def mock_base_worker_with_cache(mocker, monkeypatch, mock_worker_run_api):
     return worker
-@pytest.fixture
-def mock_elements_worker_with_cache(monkeypatch, mock_worker_run_api, tmp_path):
+@pytest.fixture()
+def mock_elements_worker_with_cache(monkeypatch, mock_cache_db, _mock_worker_run_api):
     """Build and configure an ElementsWorker using SQLite cache with fixed CLI parameters to avoid issues with pytest"""
-    cache_path = tmp_path / "db.sqlite"
-    init_cache_db(cache_path)
-    create_version_table()
-    monkeypatch.setattr(sys, "argv", ["worker", "-d", str(cache_path)])
+    monkeypatch.setattr(sys, "argv", ["worker", "-d", str(mock_cache_db)])
     worker = ElementsWorker(support_cache=True)
     worker.configure()
@@ -323,35 +330,34 @@ def mock_elements_worker_with_cache(monkeypatch, mock_worker_run_api, tmp_path):
     return worker
-@pytest.fixture
+@pytest.fixture()
 def fake_page_element():
-    with open(FIXTURES_DIR / "page_element.json", "r") as f:
-        return json.load(f)
+    return json.loads((FIXTURES_DIR / "page_element.json").read_text())
-@pytest.fixture
+@pytest.fixture()
 def fake_ufcn_worker_version():
-    with open(FIXTURES_DIR / "ufcn_line_historical_worker_version.json", "r") as f:
-        return json.load(f)
+    return json.loads(
+        (FIXTURES_DIR / "ufcn_line_historical_worker_version.json").read_text()
+    )
-@pytest.fixture
+@pytest.fixture()
 def fake_transcriptions_small():
-    with open(FIXTURES_DIR / "line_transcriptions_small.json", "r") as f:
-        return json.load(f)
+    return json.loads((FIXTURES_DIR / "line_transcriptions_small.json").read_text())
-@pytest.fixture
+@pytest.fixture()
 def model_file_dir():
     return SAMPLES_DIR / "model_files"
-@pytest.fixture
+@pytest.fixture()
 def model_file_dir_with_subfolder():
     return SAMPLES_DIR / "root_folder"
-@pytest.fixture
+@pytest.fixture()
 def fake_dummy_worker():
     api_client = MockApiClient()
     worker = ElementsWorker()
@@ -359,34 +365,8 @@ def fake_dummy_worker():
     return worker
-@pytest.fixture
-def fake_git_helper(mocker):
-    gitlab_helper = mocker.MagicMock()
-    return GitHelper(
-        "repo_url",
-        "/tmp/git_test/foo/",
-        "/tmp/test/path/",
-        "tmp_workflow_id",
-        gitlab_helper,
-    )
-@pytest.fixture
-def fake_gitlab_helper_factory():
-    # have to set up the responses, before creating the client
-    def run():
-        return GitlabHelper(
-            "balsac_exporter/balsac-exported-xmls-testing",
-            "https://gitlab.com",
-            "<GITLAB_TOKEN>",
-            "gitlab_branch",
-        )
-    return run
-@pytest.fixture
-def mock_cached_elements():
+@pytest.fixture()
+def _mock_cached_elements(mock_cache_db):
     """Insert few elements in local cache"""
     CachedElement.create(
         id=UUID("99999999-9999-9999-9999-999999999999"),
@@ -430,8 +410,8 @@ def mock_cached_elements():
     assert CachedElement.select().count() == 5
-@pytest.fixture
-def mock_cached_images():
+@pytest.fixture()
+def _mock_cached_images(mock_cache_db):
     """Insert few elements in local cache"""
     CachedImage.create(
         id=UUID("99999999-9999-9999-9999-999999999999"),
@@ -442,8 +422,8 @@ def mock_cached_images():
     assert CachedImage.select().count() == 1
-@pytest.fixture
-def mock_cached_transcriptions():
+@pytest.fixture()
+def _mock_cached_transcriptions(mock_cache_db):
     """Insert few transcriptions in local cache, on a shared element"""
     CachedElement.create(
         id=UUID("11111111-1111-1111-1111-111111111111"),
@@ -529,7 +509,7 @@ def mock_cached_transcriptions():
     )
-@pytest.fixture(scope="function")
+@pytest.fixture()
 def mock_databases(tmp_path):
     """
     Initialize several temporary databases
@@ -612,7 +592,7 @@ def mock_databases(tmp_path):
     return out
-@pytest.fixture
+@pytest.fixture()
 def default_dataset():
     return Dataset(
         **{
@@ -630,8 +610,8 @@ def default_dataset():
     )
-@pytest.fixture
-def mock_dataset_worker(monkeypatch, mocker, mock_worker_run_api):
+@pytest.fixture()
+def mock_dataset_worker(monkeypatch, mocker, _mock_worker_run_api):
     monkeypatch.setenv("PONOS_TASK", "my_task")
     mocker.patch.object(sys, "argv", ["worker"])
@@ -644,7 +624,7 @@ def mock_dataset_worker(monkeypatch, mocker, mock_worker_run_api):
     return dataset_worker
-@pytest.fixture
+@pytest.fixture()
 def mock_dev_dataset_worker(mocker):
     mocker.patch.object(
         sys,
@@ -668,12 +648,12 @@ def mock_dev_dataset_worker(mocker):
     return dataset_worker
-@pytest.fixture
+@pytest.fixture()
 def default_artifact():
     return Artifact(
         **{
             "id": "artifact_id",
-            "path": "dataset_id.zstd",
+            "path": "dataset_id.tar.zst",
             "size": 42,
             "content_type": "application/zstd",
             "s3_put_url": None,

tests/test_base_worker.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import json
 import logging
 import sys
@@ -15,7 +14,7 @@ from arkindex_worker.worker.base import ExtrasDirNotFoundError
 from tests.conftest import FIXTURES_DIR
-def test_init_default_local_share(monkeypatch):
+def test_init_default_local_share():
     worker = BaseWorker()
     assert worker.work_dir == Path("~/.local/share/arkindex").expanduser()
@@ -29,7 +28,7 @@ def test_init_default_xdg_data_home(monkeypatch):
     assert str(worker.work_dir) == f"{path}/arkindex"
-def test_init_with_local_cache(monkeypatch):
+def test_init_with_local_cache():
     worker = BaseWorker(support_cache=True)
     assert worker.work_dir == Path("~/.local/share/arkindex").expanduser()
@@ -72,7 +71,8 @@ def test_init_var_worker_local_file(monkeypatch, tmp_path):
     config.unlink()
-def test_cli_default(mocker, mock_worker_run_api):
+@pytest.mark.usefixtures("_mock_worker_run_api")
+def test_cli_default(mocker):
     worker = BaseWorker()
     assert logger.level == logging.NOTSET
@@ -91,7 +91,8 @@ def test_cli_default(mocker, mock_worker_run_api):
     logger.setLevel(logging.NOTSET)
-def test_cli_arg_verbose_given(mocker, mock_worker_run_api):
+@pytest.mark.usefixtures("_mock_worker_run_api")
+def test_cli_arg_verbose_given(mocker):
     worker = BaseWorker()
     assert logger.level == logging.NOTSET
@@ -110,7 +111,8 @@ def test_cli_arg_verbose_given(mocker, mock_worker_run_api):
     logger.setLevel(logging.NOTSET)
-def test_cli_envvar_debug_given(mocker, monkeypatch, mock_worker_run_api):
+@pytest.mark.usefixtures("_mock_worker_run_api")
+def test_cli_envvar_debug_given(mocker, monkeypatch):
     worker = BaseWorker()
     assert logger.level == logging.NOTSET
@@ -129,7 +131,7 @@ def test_cli_envvar_debug_given(mocker, monkeypatch, mock_worker_run_api):
     logger.setLevel(logging.NOTSET)
-def test_configure_dev_mode(mocker, monkeypatch):
+def test_configure_dev_mode(mocker):
     """
     Configuring a worker in developer mode avoid retrieving process information
     """
@@ -145,7 +147,7 @@ def test_configure_dev_mode(mocker, monkeypatch):
     assert worker.user_configuration == {}
-def test_configure_worker_run(mocker, monkeypatch, responses, caplog):
+def test_configure_worker_run(mocker, responses, caplog):
     # Capture log messages
     caplog.set_level(logging.INFO)
@@ -214,9 +216,8 @@ def test_configure_worker_run(mocker, monkeypatch, responses, caplog):
     assert worker.user_configuration == {"a": "b"}
-def test_configure_worker_run_no_revision(
-    mocker, monkeypatch, mock_worker_run_no_revision_api, caplog
-):
+@pytest.mark.usefixtures("_mock_worker_run_no_revision_api")
+def test_configure_worker_run_no_revision(mocker, caplog):
     worker = BaseWorker()
     mocker.patch.object(sys, "argv", ["worker"])
@@ -234,11 +235,7 @@ def test_configure_worker_run_no_revision(
     ]
-def test_configure_user_configuration_defaults(
-    mocker,
-    monkeypatch,
-    responses,
-):
+def test_configure_user_configuration_defaults(mocker, responses):
     worker = BaseWorker()
     mocker.patch.object(sys, "argv")
     worker.args = worker.parser.parse_args()
@@ -300,8 +297,8 @@ def test_configure_user_configuration_defaults(
     }
-@pytest.mark.parametrize("debug", (True, False))
-def test_configure_user_config_debug(mocker, monkeypatch, responses, debug):
+@pytest.mark.parametrize("debug", [True, False])
+def test_configure_user_config_debug(mocker, responses, debug):
     worker = BaseWorker()
     mocker.patch.object(sys, "argv", ["worker"])
     assert logger.level == logging.NOTSET
@@ -347,7 +344,7 @@ def test_configure_user_config_debug(mocker, monkeypatch, responses, debug):
     logger.setLevel(logging.NOTSET)
-def test_configure_worker_run_missing_conf(mocker, monkeypatch, responses):
+def test_configure_worker_run_missing_conf(mocker, responses):
     worker = BaseWorker()
     mocker.patch.object(sys, "argv", ["worker"])
@@ -392,7 +389,7 @@ def test_configure_worker_run_missing_conf(mocker, monkeypatch, responses):
     assert worker.user_configuration == {}
-def test_configure_worker_run_no_worker_run_conf(mocker, monkeypatch, responses):
+def test_configure_worker_run_no_worker_run_conf(mocker, responses):
     """
     No configuration is provided but should not crash
     """
@@ -434,7 +431,7 @@ def test_configure_worker_run_no_worker_run_conf(mocker, monkeypatch, responses)
     assert worker.user_configuration == {}
-def test_configure_load_model_configuration(mocker, monkeypatch, responses):
+def test_configure_load_model_configuration(mocker, responses):
     worker = BaseWorker()
     mocker.patch.object(sys, "argv", ["worker"])
     payload = {
@@ -454,7 +451,10 @@ def test_configure_load_model_configuration(mocker, monkeypatch, responses):
         "configuration": None,
         "model_version": {
             "id": "12341234-1234-1234-1234-123412341234",
-            "name": "Model version 1337",
+            "model": {
+                "id": "43214321-4321-4321-4321-432143214321",
+                "name": "Model 1337",
+            },
             "configuration": {
                 "param1": "value1",
                 "param2": 2,
@@ -489,6 +489,10 @@ def test_configure_load_model_configuration(mocker, monkeypatch, responses):
         "param3": None,
     }
     assert worker.model_version_id == "12341234-1234-1234-1234-123412341234"
+    assert worker.model_details == {
+        "id": "43214321-4321-4321-4321-432143214321",
+        "name": "Model 1337",
+    }
 def test_load_missing_secret():
@@ -578,7 +582,7 @@ def test_load_local_secret(monkeypatch, tmp_path):
     secret.write_text("this is a local secret value", encoding="utf-8")
     # Mock GPG decryption
-    class GpgDecrypt(object):
+    class GpgDecrypt:
         def __init__(self, fd):
             self.ok = True
             self.data = fd.read()
@@ -631,15 +635,15 @@ def test_find_extras_directory_from_config(monkeypatch):
 @pytest.mark.parametrize(
-    "extras_path, exists, error",
-    (
-        [
+    ("extras_path", "exists", "error"),
+    [
+        (
             None,
             True,
             "No path to the directory for extra files was provided. Please provide extras_dir either through configuration or as CLI argument.",
-        ],
-        ["extra_files", False, "The path extra_files does not link to any directory"],
-    ),
+        ),
+        ("extra_files", False, "The path extra_files does not link to any directory"),
+    ],
 )
 def test_find_extras_directory_not_found(monkeypatch, extras_path, exists, error):
     if extras_path:
@@ -666,7 +670,9 @@ def test_find_parents_file_paths(responses, mock_base_worker_with_cache, tmp_pat
     )
     filename = Path("my_file.txt")
-    for parent_id, content in zip(["first", "third"], ["Some text", "Other text"]):
+    for parent_id, content in zip(
+        ["first", "third"], ["Some text", "Other text"], strict=True
+    ):
         (tmp_path / parent_id).mkdir()
         file_path = tmp_path / parent_id / filename
         with file_path.open("w", encoding="utf-8") as f:
@@ -697,7 +703,7 @@ def test_extract_parent_archives(tmp_path):
     ]
     worker.task_data_dir = FIXTURES_DIR / "extract_parent_archives"
-    worker.extract_parent_archives("arkindex_data.zstd", tmp_path)
+    worker.extract_parent_archives("arkindex_data.tar.zst", tmp_path)
     extracted_files = [
         # Test
@@ -742,7 +748,7 @@ def test_corpus_id_not_set_read_only_mode(
     with pytest.raises(
         Exception, match="Missing ARKINDEX_CORPUS_ID environment variable"
     ):
-        mock_elements_worker_read_only.corpus_id
+        _ = mock_elements_worker_read_only.corpus_id
 def test_corpus_id_set_read_only_mode(

tests/test_cache.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 from pathlib import Path
 from uuid import UUID
@@ -31,22 +30,20 @@ def test_init(tmp_path):
 def test_create_tables_existing_table(tmp_path):
-    db_path = f"{tmp_path}/db.sqlite"
+    db_path = tmp_path / "db.sqlite"
     # Create the tables once…
     init_cache_db(db_path)
     create_tables()
     db.close()
-    with open(db_path, "rb") as before_file:
-        before = before_file.read()
+    before = db_path.read_bytes()
     # Create them again
     init_cache_db(db_path)
     create_tables()
-    with open(db_path, "rb") as after_file:
-        after = after_file.read()
+    after = db_path.read_bytes()
     assert before == after, "Existing table structure was modified"
@@ -144,7 +141,17 @@ def test_check_version_same_version(tmp_path):
 @pytest.mark.parametrize(
-    "image_width,image_height,polygon_x,polygon_y,polygon_width,polygon_height,max_width,max_height,expected_url",
+    (
+        "image_width",
+        "image_height",
+        "polygon_x",
+        "polygon_y",
+        "polygon_width",
+        "polygon_height",
+        "max_width",
+        "max_height",
+        "expected_url",
+    ),
     [
         # No max_size: no resize
         (

arkindex-base-worker 0.3.5rc6__py3-none-any.whl → 0.3.6rc2__py3-none-any.whl

arkindex-base-worker 0.3.5rc6py3-none-any.whl → 0.3.6rc2py3-none-any.whl