PyPI - arkindex-base-worker - Versions diffs - 0.3.7rc10__py3-none-any.whl → 0.4.0a1__py3-none-any.whl - Mend

arkindex-base-worker 0.3.7rc10py3-none-any.whl → 0.4.0a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{arkindex_base_worker-0.3.7rc10.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/METADATA +10 -16
arkindex_base_worker-0.4.0a1.dist-info/RECORD +51 -0
arkindex_worker/models.py +6 -0
arkindex_worker/utils.py +3 -4
arkindex_worker/worker/__init__.py +23 -2
arkindex_worker/worker/base.py +3 -23
arkindex_worker/worker/corpus.py +69 -0
arkindex_worker/worker/image.py +21 -0
arkindex_worker/worker/training.py +12 -0
tests/__init__.py +8 -0
tests/conftest.py +4 -8
tests/test_base_worker.py +8 -9
tests/test_dataset_worker.py +14 -41
tests/test_elements_worker/test_classifications.py +22 -39
tests/test_elements_worker/test_cli.py +3 -11
tests/test_elements_worker/test_corpus.py +137 -0
tests/test_elements_worker/test_dataset.py +6 -11
tests/test_elements_worker/test_elements.py +106 -85
tests/test_elements_worker/test_entities.py +15 -39
tests/test_elements_worker/test_image.py +65 -0
tests/test_elements_worker/test_metadata.py +6 -40
tests/test_elements_worker/test_task.py +7 -17
tests/test_elements_worker/test_training.py +35 -0
tests/test_elements_worker/test_transcriptions.py +10 -27
tests/test_elements_worker/test_worker.py +2 -1
tests/test_image.py +3 -5
arkindex_base_worker-0.3.7rc10.dist-info/RECORD +0 -47
{arkindex_base_worker-0.3.7rc10.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/LICENSE +0 -0
{arkindex_base_worker-0.3.7rc10.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/WHEEL +0 -0
{arkindex_base_worker-0.3.7rc10.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/top_level.txt +0 -0

{arkindex_base_worker-0.3.7rc10.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: arkindex-base-worker
-Version: 0.3.7rc10
+Version: 0.4.0a1
 Summary: Base Worker to easily build Arkindex ML workflows
 Author-email: Teklia <contact@teklia.com>
 Maintainer-email: Teklia <contact@teklia.com>
@@ -37,28 +37,22 @@ Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3 :: Only
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
-Classifier: Topic :: Text Processing :: Linguistic
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: peewee ==3.17.0
-Requires-Dist: Pillow ==10.2.0
-Requires-Dist: pymdown-extensions ==10.7
+Requires-Dist: peewee ~=3.17
+Requires-Dist: Pillow ==10.3.0
 Requires-Dist: python-gnupg ==0.5.2
 Requires-Dist: shapely ==2.0.3
-Requires-Dist: teklia-toolbox ==0.1.4
+Requires-Dist: teklia-toolbox ==0.1.5
 Requires-Dist: zstandard ==0.22.0
 Provides-Extra: docs
-Requires-Dist: black ==24.2.0 ; extra == 'docs'
-Requires-Dist: doc8 ==1.1.1 ; extra == 'docs'
-Requires-Dist: mkdocs ==1.5.3 ; extra == 'docs'
-Requires-Dist: mkdocs-material ==9.5.10 ; extra == 'docs'
-Requires-Dist: mkdocstrings ==0.24.0 ; extra == 'docs'
-Requires-Dist: mkdocstrings-python ==1.8.0 ; extra == 'docs'
-Requires-Dist: recommonmark ==0.7.1 ; extra == 'docs'
+Requires-Dist: black ==24.4.0 ; extra == 'docs'
+Requires-Dist: mkdocs-material ==9.5.17 ; extra == 'docs'
+Requires-Dist: mkdocstrings-python ==1.9.2 ; extra == 'docs'
 Provides-Extra: tests
-Requires-Dist: pytest ==8.0.1 ; extra == 'tests'
-Requires-Dist: pytest-mock ==3.12.0 ; extra == 'tests'
+Requires-Dist: pytest ==8.1.1 ; extra == 'tests'
+Requires-Dist: pytest-mock ==3.14.0 ; extra == 'tests'
 Requires-Dist: pytest-responses ==0.5.1 ; extra == 'tests'
 # Arkindex base Worker
@@ -73,7 +67,7 @@ The [documentation](https://workers.arkindex.org/) is made with [Material for Mk
 ## Create a new worker using our template
-```
+```shell
 pip install --user cookiecutter
 cookiecutter git@gitlab.teklia.com:workers/base-worker.git
 ```

arkindex_base_worker-0.4.0a1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,51 @@
+arkindex_worker/__init__.py,sha256=OlgCtTC9MaWeejviY0a3iQpALcRQGMVArFVVYwTF6I8,162
+arkindex_worker/cache.py,sha256=FTlB0coXofn5zTNRTcVIvh709mcw4a1bPGqkwWjKs3w,11248
+arkindex_worker/image.py,sha256=5ymIGaTm2D7Sp2YYQkbuheuGnx5VJo0_AzYAEIvNGhs,14267
+arkindex_worker/models.py,sha256=bPQzGZNs5a6z6DEcygsa8T33VOqPlMUbwKzHqlKzwbw,9923
+arkindex_worker/utils.py,sha256=KXWIACda7D3IpdToaAplLoAgnCK8bKWw7aWUyq-IWUA,7211
+arkindex_worker/worker/__init__.py,sha256=3sJ_EPB7yG-kPfgunbm2B7B7DzoeOi5ZNpQwC_3QuZ0,19429
+arkindex_worker/worker/base.py,sha256=c9u37W1BNHt5RoQV2ZrYUYv6tBs-CjiSgUAAg7p7GA0,18876
+arkindex_worker/worker/classification.py,sha256=JVz-6YEeuavOy7zGfQi4nE_wpj9hwMUZDXTem-hXQY8,10328
+arkindex_worker/worker/corpus.py,sha256=ZHAAYE4PRPXqqaZm71wjrsxYETFqU6TAz-3VYgIXzac,1794
+arkindex_worker/worker/dataset.py,sha256=roX2IMMNA-icteTtRADiFSZiZSRPClqS62ZPJm9s2JI,2923
+arkindex_worker/worker/element.py,sha256=AWK3YJSHWy3j4ajntJloi_2X4zxsgXZ6c6dzphgq3OI,33848
+arkindex_worker/worker/entity.py,sha256=suhycfikC9oTPEWmX48_cnvFEw-Wu5zBA8n_00K4KUk,14714
+arkindex_worker/worker/image.py,sha256=t_Az6IGnj0EZyvcA4XxfPikOUjn_pztgsyxTkFZhaXU,621
+arkindex_worker/worker/metadata.py,sha256=Bouuc_JaXogKykVXOTKDVP3tX--OUQeHoazxIGrGrJI,6702
+arkindex_worker/worker/task.py,sha256=cz3wJNPgogZv1lm_3lm7WScitQtYQtL6H6I7Xokq208,1475
+arkindex_worker/worker/training.py,sha256=hkwCBjVE4bByXzHUmCZF73Bl5JxARdXWjYgFE6ydAT0,10749
+arkindex_worker/worker/transcription.py,sha256=6R7ofcGnNqX4rjT0kRKIE-G9FHq2TJ1tfztNM5sTqYE,20464
+arkindex_worker/worker/version.py,sha256=cs2pdlDxpKRO2Oldvcu54w-D_DQhf1cdeEt4tKX_QYs,1927
+hooks/pre_gen_project.py,sha256=xQJERv3vv9VzIqcBHI281eeWLWREXUF4mMw7PvJHHXM,269
+tests/__init__.py,sha256=6aeTMHf4q_dKY4jIZWg1KT70VKaLvVlzCxh-Uu_cWiQ,241
+tests/conftest.py,sha256=-ZQTV4rg7TgW84-5Ioqndqv8byNILfDOpyUt8wecEiI,21967
+tests/test_base_worker.py,sha256=qG45O3nPbASXN5a5RadXU1BAXn3EIaTK6Hvjj3s4Ozs,24292
+tests/test_cache.py,sha256=ii0gyr0DrG7ChEs7pmT8hMdSguAOAcCze4bRMiFQxuk,10640
+tests/test_dataset_worker.py,sha256=d9HG36qnO5HXu9vQ0UTBvdTSRR21FVq1FNoXM-vZbPk,22105
+tests/test_element.py,sha256=2G9M15TLxQRmvrWM9Kw2ucnElh4kSv_oF_5FYwwAxTY,13181
+tests/test_image.py,sha256=Fs9vKYgQ7mEFylbzI4YIO_JyOLeAcs-WxUXpzewxCd8,16188
+tests/test_merge.py,sha256=Q4zCbtZbe0wBfqE56gvAD06c6pDuhqnjKaioFqIgAQw,8331
+tests/test_utils.py,sha256=vpeHMeL7bJQonv5ZEbJmlJikqVKn5VWlVEbvmYFzDYA,1650
+tests/test_elements_worker/__init__.py,sha256=Fh4nkbbyJSMv_VtjQxnWrOqTnxXaaWI8S9WU0VrzCHs,179
+tests/test_elements_worker/test_classifications.py,sha256=DYRKhPpplFp144GCXKyFG1hz4Ra9vk5FiAN6dhfMP6k,25511
+tests/test_elements_worker/test_cli.py,sha256=a23i1pUDbXi23MUtbWwGEcLLrmc_YlrbDgOG3h66wLM,2620
+tests/test_elements_worker/test_corpus.py,sha256=c_LUHvkJIYgk_wXF06VQPNOoWfiZ06XpjOXrJ7MRiBc,4479
+tests/test_elements_worker/test_dataset.py,sha256=lSXqubhg1EEq2Y2goE8Y2RYaqIpM9Iejq6fGNW2BczU,11411
+tests/test_elements_worker/test_elements.py,sha256=2_kdeo99biCH3Uez6HB8ltS_iIizZ7ir5uOkFjIXfjM,84812
+tests/test_elements_worker/test_entities.py,sha256=jirb_IKAMqMhwxeDgjO-rsr1fTP9GdXwuyhncUjCJFM,33494
+tests/test_elements_worker/test_image.py,sha256=_E3UGdDOwTo1MW5KMS81PrdeSPBPWinWYoQPNy2F9Ro,2077
+tests/test_elements_worker/test_metadata.py,sha256=-cZhlVAh4o2uRnHz8fPf_thfavRnJrtJYN_p4BmHISU,17566
+tests/test_elements_worker/test_task.py,sha256=7Sr3fbjdgWUXJUhJEiC9CwnbhQIQX3rCInmHMIrmA38,5573
+tests/test_elements_worker/test_training.py,sha256=wVYWdMdeSA6T2XyhH5AJJNGemYq3LOViiZvj0dblACA,9468
+tests/test_elements_worker/test_transcriptions.py,sha256=7HDkIW8IDK7pKAfpSdAPB7YOyKyeBJTn2_alvVK46SA,72411
+tests/test_elements_worker/test_worker.py,sha256=AwdP8uSXNQ_SJavXxJV2s3_J3OiCafShVjMV1dgt4xo,17162
+worker-demo/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+worker-demo/tests/conftest.py,sha256=XzNMNeg6pmABUAH8jN6eZTlZSFGLYjS3-DTXjiRN6Yc,1002
+worker-demo/tests/test_worker.py,sha256=3DLd4NRK4bfyatG5P_PK4k9P9tJHx9XQq5_ryFEEFVg,304
+worker-demo/worker_demo/__init__.py,sha256=2BPomV8ZMNf3YXJgloatKeHQCE6QOkwmsHGkO6MkQuM,125
+worker-demo/worker_demo/worker.py,sha256=Rt-DjWa5iBP08k58NDZMfeyPuFbtNcbX6nc5jFX7GNo,440
+arkindex_base_worker-0.4.0a1.dist-info/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
+arkindex_base_worker-0.4.0a1.dist-info/METADATA,sha256=PBTlbhWTCvvkkcGqQew6yvJIdncf9mKZ71yI_QSX2iM,3269
+arkindex_base_worker-0.4.0a1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+arkindex_base_worker-0.4.0a1.dist-info/top_level.txt,sha256=58NuslgxQC2vT4DiqZEgO4JqJRrYa2yeNI9QvkbfGQU,40
+arkindex_base_worker-0.4.0a1.dist-info/RECORD,,

arkindex_worker/models.py CHANGED Viewed

@@ -261,6 +261,12 @@ class Transcription(ArkindexModel):
     """
+class Image(ArkindexModel):
+    """
+    Describes an Arkindex image.
+    """
 class Dataset(ArkindexModel):
     """
     Describes an Arkindex dataset.

arkindex_worker/utils.py CHANGED Viewed

@@ -5,7 +5,6 @@ import tarfile
 import tempfile
 from pathlib import Path
-import zstandard
 import zstandard as zstd
 logger = logging.getLogger(__name__)
@@ -38,7 +37,7 @@ def decompress_zst_archive(compressed_archive: Path) -> tuple[int, Path]:
     :param compressed_archive: Path to the target ZST-compressed archive
     :return: File descriptor and path to the uncompressed tar archive
     """
-    dctx = zstandard.ZstdDecompressor()
+    dctx = zstd.ZstdDecompressor()
     archive_fd, archive_path = tempfile.mkstemp(suffix=".tar")
     archive_path = Path(archive_path)
@@ -50,7 +49,7 @@ def decompress_zst_archive(compressed_archive: Path) -> tuple[int, Path]:
         ):
             dctx.copy_stream(compressed, decompressed)
         logger.debug(f"Successfully uncompressed archive {compressed_archive}")
-    except zstandard.ZstdError as e:
+    except zstd.ZstdError as e:
         raise Exception(f"Couldn't uncompressed archive: {e}") from e
     return archive_fd, archive_path
@@ -129,7 +128,7 @@ def zstd_compress(
                 archive_hasher.update(compressed_chunk)
                 archive_file.write(compressed_chunk)
         logger.debug(f"Successfully compressed {source}")
-    except zstandard.ZstdError as e:
+    except zstd.ZstdError as e:
         raise Exception(f"Couldn't compress archive: {e}") from e
     return file_d, destination, archive_hasher.hexdigest()

arkindex_worker/worker/__init__.py CHANGED Viewed

@@ -19,9 +19,11 @@ from arkindex_worker.cache import CachedElement
 from arkindex_worker.models import Dataset, Element, Set
 from arkindex_worker.worker.base import BaseWorker
 from arkindex_worker.worker.classification import ClassificationMixin
+from arkindex_worker.worker.corpus import CorpusMixin
 from arkindex_worker.worker.dataset import DatasetMixin, DatasetState
 from arkindex_worker.worker.element import ElementMixin
 from arkindex_worker.worker.entity import EntityMixin
+from arkindex_worker.worker.image import ImageMixin
 from arkindex_worker.worker.metadata import MetaDataMixin, MetaType  # noqa: F401
 from arkindex_worker.worker.task import TaskMixin
 from arkindex_worker.worker.transcription import TranscriptionMixin
@@ -57,11 +59,13 @@ class ActivityState(Enum):
 class ElementsWorker(
     BaseWorker,
     ClassificationMixin,
+    CorpusMixin,
     ElementMixin,
     TranscriptionMixin,
     WorkerVersionMixin,
     EntityMixin,
     MetaDataMixin,
+    ImageMixin,
 ):
     """
     Base class for ML workers that operate on Arkindex elements.
@@ -88,7 +92,7 @@ class ElementsWorker(
         )
         self.parser.add_argument(
             "--element",
-            type=uuid.UUID,
+            type=str,
             nargs="+",
             help="One or more Arkindex element ID",
         )
@@ -107,11 +111,23 @@ class ElementsWorker(
         the cache database when enabled.
         :return: An iterable of [CachedElement][arkindex_worker.cache.CachedElement] when cache support is enabled,
-           and a list of strings representing element IDs otherwise.
+           or a list of strings representing element IDs otherwise.
         """
         assert not (
             self.args.elements_list and self.args.element
         ), "elements-list and element CLI args shouldn't be both set"
+        def invalid_element_id(value: str) -> bool:
+            """
+            Return whether the ID of an element is a valid UUID or not
+            """
+            try:
+                uuid.UUID(value)
+            except Exception:
+                return True
+            return False
         out = []
         # Load from the cache when available
@@ -131,6 +147,11 @@ class ElementsWorker(
         elif self.args.element:
             out += self.args.element
+        invalid_element_ids = list(filter(invalid_element_id, out))
+        assert (
+            not invalid_element_ids
+        ), f"These element IDs are invalid: {', '.join(invalid_element_ids)}"
         return out
     @property

arkindex_worker/worker/base.py CHANGED Viewed

@@ -13,14 +13,8 @@ from tempfile import mkdtemp
 import gnupg
 import yaml
 from apistar.exceptions import ErrorResponse
-from tenacity import (
-    before_sleep_log,
-    retry,
-    retry_if_exception,
-    stop_after_attempt,
-    wait_exponential,
-)
+from arkindex import options_from_env
 from arkindex_worker import logger
 from arkindex_worker.cache import (
     check_version,
@@ -30,7 +24,7 @@ from arkindex_worker.cache import (
     merge_parents_cache,
 )
 from arkindex_worker.utils import close_delete_file, extract_tar_zst_archive
-from teklia_toolbox.requests import _get_arkindex_client, _is_500_error
+from teklia_toolbox.requests import get_arkindex_client
 class ExtrasDirNotFoundError(Exception):
@@ -185,7 +179,7 @@ class BaseWorker:
         Create an ArkindexClient to make API requests towards Arkindex instances.
         """
         # Build Arkindex API client from environment variables
-        self.api_client = _get_arkindex_client()
+        self.api_client = get_arkindex_client(**options_from_env())
         logger.debug(f"Setup Arkindex API client on {self.api_client.document.url}")
     def configure_for_developers(self):
@@ -477,23 +471,9 @@ class BaseWorker:
             # Clean up
             shutil.rmtree(base_extracted_path)
-    @retry(
-        retry=retry_if_exception(_is_500_error),
-        wait=wait_exponential(multiplier=2, min=3),
-        reraise=True,
-        stop=stop_after_attempt(5),
-        before_sleep=before_sleep_log(logger, logging.INFO),
-    )
     def request(self, *args, **kwargs):
         """
         Wrapper around the ``ArkindexClient.request`` method.
-        The API call will be retried up to 5 times in case of HTTP 5xx errors,
-        with an exponential sleep time of 3, 4, 8 and 16 seconds between calls.
-        If the 5th call still causes an HTTP 5xx error, the exception is re-raised
-        and the caller should catch it.
-        Log messages are displayed when an HTTP 5xx error occurs, before waiting for the next call.
         """
         return self.api_client.request(*args, **kwargs)

arkindex_worker/worker/corpus.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""
+BaseWorker methods for corpora.
+"""
+from enum import Enum
+from operator import itemgetter
+from tempfile import _TemporaryFileWrapper
+from arkindex_worker import logger
+class CorpusExportState(Enum):
+    """
+    State of a corpus export.
+    """
+    Created = "created"
+    """
+    The corpus export is created, awaiting its processing.
+    """
+    Running = "running"
+    """
+    The corpus export is being built.
+    """
+    Failed = "failed"
+    """
+    The corpus export failed.
+    """
+    Done = "done"
+    """
+    The corpus export ended in success.
+    """
+class CorpusMixin:
+    def download_latest_export(self) -> _TemporaryFileWrapper:
+        """
+        Download the latest export in `done` state of the current corpus.
+        :returns: The downloaded export stored in a temporary file.
+        """
+        # List all exports on the corpus
+        exports = self.api_client.paginate("ListExports", id=self.corpus_id)
+        # Find the latest that is in "done" state
+        exports: list[dict] = sorted(
+            list(
+                filter(
+                    lambda export: export["state"] == CorpusExportState.Done.value,
+                    exports,
+                )
+            ),
+            key=itemgetter("updated"),
+            reverse=True,
+        )
+        assert (
+            len(exports) > 0
+        ), f'No available exports found for the corpus ({self.corpus_id}) with state "{CorpusExportState.Done.value.capitalize()}".'
+        # Download latest export
+        export_id: str = exports[0]["id"]
+        logger.info(f"Downloading export ({export_id})...")
+        export: _TemporaryFileWrapper = self.request("DownloadExport", id=export_id)
+        logger.info(f"Downloaded export ({export_id}) @ `{export.name}`")
+        return export

arkindex_worker/worker/image.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""
+ElementsWorker methods for images.
+"""
+from arkindex_worker.models import Image
+class ImageMixin:
+    def create_iiif_url(self, url: str) -> Image:
+        """
+        Create an image from an existing IIIF image by URL.
+        The URL should be of the image's identifier, not of its Image Information request (`/info.json`).
+        :param url: URL of the image.
+        :returns: The created image.
+        """
+        assert url and isinstance(
+            url, str
+        ), "url shouldn't be null and should be of type str"
+        return Image(self.api_client.request("CreateIIIFURL", body={"url": url}))

arkindex_worker/worker/training.py CHANGED Viewed

@@ -83,6 +83,9 @@ class TrainingMixin:
     @property
     def is_finetuning(self) -> bool:
+        """
+        Whether or not this worker is fine-tuning an existing model version.
+        """
         return bool(self.model_version_id)
     @skip_if_read_only
@@ -280,8 +283,17 @@ class TrainingMixin:
                 },
             )
         except ErrorResponse as e:
+            # Temporary fix while waiting for `ValidateModelVersion` refactoring as it can
+            # return errors even when the model version is properly validated
+            if e.status_code in [403, 500]:
+                logger.warning(
+                    f'An error occurred while validating model version {self.model_version["id"]}, please check its status.'
+                )
+                return
             if e.status_code != 409:
                 raise e
             logger.warning(
                 f"An available model version exists with hash {hash}, using it instead of the pending version."
             )

tests/__init__.py CHANGED Viewed

@@ -0,0 +1,8 @@
+from pathlib import Path
+BASE_DIR = Path(__file__).resolve().parent
+FIXTURES_DIR = BASE_DIR / "data"
+SAMPLES_DIR = BASE_DIR / "samples"
+CORPUS_ID = "11111111-1111-1111-1111-111111111111"
+PROCESS_ID = "cafecafe-cafe-cafe-cafe-cafecafecafe"

tests/conftest.py CHANGED Viewed

@@ -26,11 +26,7 @@ from arkindex_worker.models import Artifact, Dataset, Set
 from arkindex_worker.worker import BaseWorker, DatasetWorker, ElementsWorker
 from arkindex_worker.worker.dataset import DatasetState
 from arkindex_worker.worker.transcription import TextOrientation
-FIXTURES_DIR = Path(__file__).resolve().parent / "data"
-SAMPLES_DIR = Path(__file__).resolve().parent / "samples"
-PROCESS_ID = "cafecafe-cafe-cafe-cafe-cafecafecafe"
+from tests import CORPUS_ID, FIXTURES_DIR, PROCESS_ID, SAMPLES_DIR
 __yaml_cache = {}
@@ -93,7 +89,7 @@ def _setup_api(responses, monkeypatch, _cache_yaml):
     # Fallback to prod environment
     if schema_url is None:
-        schema_url = "https://arkindex.teklia.com/api/v1/openapi/?format=openapi-json"
+        schema_url = "https://arkindex.teklia.com/api/v1/openapi/?format=json"
         monkeypatch.setenv("ARKINDEX_API_SCHEMA_URL", schema_url)
     # Allow accessing remote API schemas
@@ -159,7 +155,7 @@ def _mock_worker_run_api(responses):
             "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
             "state": "running",
             "mode": "workers",
-            "corpus": "11111111-1111-1111-1111-111111111111",
+            "corpus": CORPUS_ID,
             "use_cache": False,
             "activity_state": "ready",
             "model_id": None,
@@ -226,7 +222,7 @@ def _mock_worker_run_no_revision_api(responses):
             "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
             "state": "running",
             "mode": "workers",
-            "corpus": "11111111-1111-1111-1111-111111111111",
+            "corpus": CORPUS_ID,
             "use_cache": False,
             "activity_state": "ready",
             "model_id": None,

tests/test_base_worker.py CHANGED Viewed

@@ -11,7 +11,7 @@ from arkindex.mock import MockApiClient
 from arkindex_worker import logger
 from arkindex_worker.worker import BaseWorker, ElementsWorker
 from arkindex_worker.worker.base import ExtrasDirNotFoundError
-from tests.conftest import FIXTURES_DIR
+from tests import CORPUS_ID, FIXTURES_DIR
 def test_init_default_local_share():
@@ -178,7 +178,7 @@ def test_configure_worker_run(mocker, responses, caplog):
         "model_version": None,
         "process": {
             "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
-            "corpus": "11111111-1111-1111-1111-111111111111",
+            "corpus": CORPUS_ID,
         },
         "summary": "Worker Fake worker @ 123412",
     }
@@ -270,7 +270,7 @@ def test_configure_user_configuration_defaults(mocker, responses):
         "model_version": None,
         "process": {
             "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
-            "corpus": "11111111-1111-1111-1111-111111111111",
+            "corpus": CORPUS_ID,
         },
         "summary": "Worker Fake worker @ 123412",
     }
@@ -319,7 +319,7 @@ def test_configure_user_config_debug(mocker, responses, debug):
         },
         "process": {
             "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
-            "corpus": "11111111-1111-1111-1111-111111111111",
+            "corpus": CORPUS_ID,
         },
         "summary": "Worker Fake worker @ 123412",
     }
@@ -367,7 +367,7 @@ def test_configure_worker_run_missing_conf(mocker, responses):
         "configuration": {"id": "bbbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "name": "BBB"},
         "process": {
             "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
-            "corpus": "11111111-1111-1111-1111-111111111111",
+            "corpus": CORPUS_ID,
         },
         "summary": "Worker Fake worker @ 123412",
     }
@@ -409,7 +409,7 @@ def test_configure_worker_run_no_worker_run_conf(mocker, responses):
         "configuration": None,
         "process": {
             "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
-            "corpus": "11111111-1111-1111-1111-111111111111",
+            "corpus": CORPUS_ID,
         },
         "summary": "Worker Fake worker @ 123412",
     }
@@ -458,7 +458,7 @@ def test_configure_load_model_configuration(mocker, responses):
         },
         "process": {
             "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
-            "corpus": "11111111-1111-1111-1111-111111111111",
+            "corpus": CORPUS_ID,
         },
         "summary": "Worker Fake worker @ 123412",
     }
@@ -669,8 +669,7 @@ def test_find_parents_file_paths(responses, mock_base_worker_with_cache, tmp_pat
     ):
         (tmp_path / parent_id).mkdir()
         file_path = tmp_path / parent_id / filename
-        with file_path.open("w", encoding="utf-8") as f:
-            f.write(content)
+        file_path.write_text(content)
     # Configure worker with a specific data directory
     mock_base_worker_with_cache.task_data_dir = tmp_path

tests/test_dataset_worker.py CHANGED Viewed

@@ -8,7 +8,7 @@ from apistar.exceptions import ErrorResponse
 from arkindex_worker.models import Dataset, Set
 from arkindex_worker.worker import MissingDatasetArchive, check_dataset_set
 from arkindex_worker.worker.dataset import DatasetState
-from tests.conftest import FIXTURES_DIR, PROCESS_ID
+from tests import FIXTURES_DIR, PROCESS_ID
 from tests.test_elements_worker import BASE_API_CALLS
 RANDOM_UUID = uuid.uuid4()
@@ -63,22 +63,17 @@ def test_download_dataset_artifact_list_api_error(
     responses.add(
         responses.GET,
         f"http://testserver/api/v1/task/{task_id}/artifacts/",
-        status=500,
+        status=418,
     )
     with pytest.raises(ErrorResponse):
         mock_dataset_worker.download_dataset_artifact(default_dataset)
-    assert len(responses.calls) == len(BASE_API_CALLS) + 5
+    assert len(responses.calls) == len(BASE_API_CALLS) + 1
     assert [
         (call.request.method, call.request.url) for call in responses.calls
     ] == BASE_API_CALLS + [
-        # The API call is retried 5 times
-        ("GET", f"http://testserver/api/v1/task/{task_id}/artifacts/"),
-        ("GET", f"http://testserver/api/v1/task/{task_id}/artifacts/"),
-        ("GET", f"http://testserver/api/v1/task/{task_id}/artifacts/"),
-        ("GET", f"http://testserver/api/v1/task/{task_id}/artifacts/"),
-        ("GET", f"http://testserver/api/v1/task/{task_id}/artifacts/"),
+        ("GET", f"http://testserver/api/v1/task/{task_id}/artifacts/")
     ]
@@ -116,22 +111,17 @@ def test_download_dataset_artifact_download_api_error(
     responses.add(
         responses.GET,
         f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.tar.zst",
-        status=500,
+        status=418,
     )
     with pytest.raises(ErrorResponse):
         mock_dataset_worker.download_dataset_artifact(default_dataset)
-    assert len(responses.calls) == len(BASE_API_CALLS) + 6
+    assert len(responses.calls) == len(BASE_API_CALLS) + 2
     assert [
         (call.request.method, call.request.url) for call in responses.calls
     ] == BASE_API_CALLS + [
         ("GET", f"http://testserver/api/v1/task/{task_id}/artifacts/"),
-        # The API call is retried 5 times
-        ("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.tar.zst"),
-        ("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.tar.zst"),
-        ("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.tar.zst"),
-        ("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.tar.zst"),
         ("GET", f"http://testserver/api/v1/task/{task_id}/artifact/dataset_id.tar.zst"),
     ]
@@ -284,7 +274,7 @@ def test_list_sets_api_error(responses, mock_dataset_worker):
     responses.add(
         responses.GET,
         f"http://testserver/api/v1/process/{PROCESS_ID}/sets/",
-        status=500,
+        status=418,
     )
     with pytest.raises(
@@ -393,20 +383,15 @@ def test_list_sets_retrieve_dataset_api_error(
     responses.add(
         responses.GET,
         f"http://testserver/api/v1/datasets/{default_dataset.id}/",
-        status=500,
+        status=418,
     )
     with pytest.raises(ErrorResponse):
         next(mock_dev_dataset_worker.list_sets())
-    assert len(responses.calls) == 5
+    assert len(responses.calls) == 1
     assert [(call.request.method, call.request.url) for call in responses.calls] == [
-        # The API call is retried 5 times
-        ("GET", f"http://testserver/api/v1/datasets/{default_dataset.id}/"),
-        ("GET", f"http://testserver/api/v1/datasets/{default_dataset.id}/"),
-        ("GET", f"http://testserver/api/v1/datasets/{default_dataset.id}/"),
-        ("GET", f"http://testserver/api/v1/datasets/{default_dataset.id}/"),
-        ("GET", f"http://testserver/api/v1/datasets/{default_dataset.id}/"),
+        ("GET", f"http://testserver/api/v1/datasets/{default_dataset.id}/")
     ]
@@ -494,22 +479,17 @@ def test_run_download_dataset_artifact_api_error(
     responses.add(
         responses.GET,
         f"http://testserver/api/v1/task/{default_dataset.task_id}/artifacts/",
-        status=500,
+        status=418,
     )
     with pytest.raises(SystemExit):
         mock_dataset_worker.run()
-    assert len(responses.calls) == len(BASE_API_CALLS) * 2 + 5
+    assert len(responses.calls) == len(BASE_API_CALLS) * 2 + 1
     assert [
         (call.request.method, call.request.url) for call in responses.calls
     ] == BASE_API_CALLS * 2 + [
-        # We retry 5 times the API call
-        ("GET", f"http://testserver/api/v1/task/{default_dataset.task_id}/artifacts/"),
-        ("GET", f"http://testserver/api/v1/task/{default_dataset.task_id}/artifacts/"),
-        ("GET", f"http://testserver/api/v1/task/{default_dataset.task_id}/artifacts/"),
-        ("GET", f"http://testserver/api/v1/task/{default_dataset.task_id}/artifacts/"),
-        ("GET", f"http://testserver/api/v1/task/{default_dataset.task_id}/artifacts/"),
+        ("GET", f"http://testserver/api/v1/task/{default_dataset.task_id}/artifacts/")
     ]
     assert [(level, message) for _, level, message in caplog.record_tuples] == [
@@ -519,16 +499,9 @@ def test_run_download_dataset_artifact_api_error(
             "Retrieving data for Set (train) from Dataset (dataset_id) (1/1)",
         ),
         (logging.INFO, "Downloading artifact for Dataset (dataset_id)"),
-        *[
-            (
-                logging.INFO,
-                f"Retrying arkindex_worker.worker.base.BaseWorker.request in {retry} seconds as it raised ErrorResponse: .",
-            )
-            for retry in [3.0, 4.0, 8.0, 16.0]
-        ],
         (
             logging.WARNING,
-            "An API error occurred while processing Set (train) from Dataset (dataset_id): 500 Internal Server Error - None",
+            "An API error occurred while processing Set (train) from Dataset (dataset_id): 418 I'm a Teapot - None",
         ),
         (
             logging.ERROR,

arkindex-base-worker 0.3.7rc10__py3-none-any.whl → 0.4.0a1__py3-none-any.whl

arkindex-base-worker 0.3.7rc10py3-none-any.whl → 0.4.0a1py3-none-any.whl