PyPI - arkindex-base-worker - Versions diffs - 0.3.6rc4__py3-none-any.whl → 0.3.7__py3-none-any.whl - Mend

arkindex-base-worker 0.3.6rc4py3-none-any.whl → 0.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

arkindex_base_worker-0.3.7.dist-info/LICENSE +21 -0
arkindex_base_worker-0.3.7.dist-info/METADATA +77 -0
arkindex_base_worker-0.3.7.dist-info/RECORD +47 -0
{arkindex_base_worker-0.3.6rc4.dist-info → arkindex_base_worker-0.3.7.dist-info}/WHEEL +1 -1
{arkindex_base_worker-0.3.6rc4.dist-info → arkindex_base_worker-0.3.7.dist-info}/top_level.txt +2 -0
arkindex_worker/cache.py +14 -0
arkindex_worker/image.py +29 -19
arkindex_worker/models.py +14 -2
arkindex_worker/utils.py +17 -3
arkindex_worker/worker/__init__.py +122 -125
arkindex_worker/worker/base.py +24 -24
arkindex_worker/worker/classification.py +18 -25
arkindex_worker/worker/dataset.py +24 -18
arkindex_worker/worker/element.py +100 -19
arkindex_worker/worker/entity.py +35 -4
arkindex_worker/worker/metadata.py +21 -11
arkindex_worker/worker/training.py +13 -0
arkindex_worker/worker/transcription.py +45 -5
arkindex_worker/worker/version.py +22 -0
hooks/pre_gen_project.py +3 -0
tests/conftest.py +16 -8
tests/test_base_worker.py +0 -6
tests/test_dataset_worker.py +291 -409
tests/test_elements_worker/test_classifications.py +365 -539
tests/test_elements_worker/test_cli.py +1 -1
tests/test_elements_worker/test_dataset.py +97 -116
tests/test_elements_worker/test_elements.py +354 -76
tests/test_elements_worker/test_entities.py +22 -2
tests/test_elements_worker/test_metadata.py +53 -27
tests/test_elements_worker/test_training.py +35 -0
tests/test_elements_worker/test_transcriptions.py +149 -16
tests/test_elements_worker/test_worker.py +19 -6
tests/test_image.py +37 -0
tests/test_utils.py +23 -1
worker-demo/tests/__init__.py +0 -0
worker-demo/tests/conftest.py +32 -0
worker-demo/tests/test_worker.py +12 -0
worker-demo/worker_demo/__init__.py +6 -0
worker-demo/worker_demo/worker.py +19 -0
arkindex_base_worker-0.3.6rc4.dist-info/METADATA +0 -47
arkindex_base_worker-0.3.6rc4.dist-info/RECORD +0 -40

arkindex_worker/worker/__init__.py CHANGED Viewed

@@ -1,31 +1,31 @@
 """
 Base classes to implement Arkindex workers.
 """
 import contextlib
 import json
 import os
 import sys
 import uuid
+from argparse import ArgumentTypeError
 from collections.abc import Iterable, Iterator
 from enum import Enum
-from itertools import groupby
-from operator import itemgetter
 from pathlib import Path
 from apistar.exceptions import ErrorResponse
 from arkindex_worker import logger
 from arkindex_worker.cache import CachedElement
-from arkindex_worker.models import Dataset, Element
+from arkindex_worker.models import Dataset, Element, Set
 from arkindex_worker.worker.base import BaseWorker
 from arkindex_worker.worker.classification import ClassificationMixin
 from arkindex_worker.worker.dataset import DatasetMixin, DatasetState
 from arkindex_worker.worker.element import ElementMixin
-from arkindex_worker.worker.entity import EntityMixin  # noqa: F401
+from arkindex_worker.worker.entity import EntityMixin
 from arkindex_worker.worker.metadata import MetaDataMixin, MetaType  # noqa: F401
 from arkindex_worker.worker.task import TaskMixin
 from arkindex_worker.worker.transcription import TranscriptionMixin
-from arkindex_worker.worker.version import WorkerVersionMixin  # noqa: F401
+from arkindex_worker.worker.version import WorkerVersionMixin
 class ActivityState(Enum):
@@ -159,6 +159,16 @@ class ElementsWorker(
             super().configure()
             super().configure_cache()
+        # Retrieve the model configuration
+        if self.model_configuration:
+            self.config.update(self.model_configuration)
+            logger.info("Model version configuration retrieved")
+        # Retrieve the user configuration
+        if self.user_configuration:
+            self.config.update(self.user_configuration)
+            logger.info("User configuration retrieved")
     def run(self):
         """
         Implements an Arkindex worker that goes through each element returned by
@@ -229,12 +239,13 @@ class ElementsWorker(
                     with contextlib.suppress(Exception):
                         self.update_activity(element.id, ActivityState.Error)
+        message = f'Ran on {count} element{"s"[:count>1]}: {count - failed} completed, {failed} failed'
         if failed:
-            logger.error(
-                f"Ran on {count} elements: {count - failed} completed, {failed} failed"
-            )
+            logger.error(message)
             if failed >= count:  # Everything failed!
                 sys.exit(1)
+        else:
+            logger.info(message)
     def process_element(self, element: Element | CachedElement):
         """
@@ -299,6 +310,21 @@ class ElementsWorker(
         return True
+def check_dataset_set(value: str) -> tuple[uuid.UUID, str]:
+    values = value.split(":")
+    if len(values) != 2:
+        raise ArgumentTypeError(
+            f"'{value}' is not in the correct format `<dataset_id>:<set_name>`"
+        )
+    dataset_id, set_name = values
+    try:
+        dataset_id = uuid.UUID(dataset_id)
+        return (dataset_id, set_name)
+    except (TypeError, ValueError) as e:
+        raise ArgumentTypeError(f"'{dataset_id}' should be a valid UUID") from e
 class MissingDatasetArchive(Exception):
     """
     Exception raised when the compressed archive associated to
@@ -308,7 +334,7 @@ class MissingDatasetArchive(Exception):
 class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
     """
-    Base class for ML workers that operate on Arkindex datasets.
+    Base class for ML workers that operate on Arkindex dataset sets.
     This class inherits from numerous mixin classes found in other modules of
     ``arkindex.worker``, which provide helpers to read and write to the Arkindex API.
@@ -318,24 +344,28 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
         self,
         description: str = "Arkindex Dataset Worker",
         support_cache: bool = False,
-        generator: bool = False,
     ):
         """
         :param description: The worker's description.
         :param support_cache: Whether the worker supports cache.
-        :param generator: Whether the worker generates the dataset archive artifact.
         """
         super().__init__(description, support_cache)
+        # Path to the dataset compressed archive (containing images and a SQLite database)
+        # Set as an instance variable as dataset workers might use it to easily extract its content
+        self.downloaded_dataset_artifact: Path | None = None
         self.parser.add_argument(
-            "--dataset",
-            type=uuid.UUID,
+            "--set",
+            type=check_dataset_set,
             nargs="+",
-            help="One or more Arkindex dataset ID",
+            help="""
+                One or more Arkindex dataset sets, format is <dataset_uuid>:<set_name>
+                (e.g.: "12341234-1234-1234-1234-123412341234:train")
+            """,
+            default=[],
         )
-        self.generator = generator
     def configure(self):
         """
         Setup the worker using CLI arguments and environment variables.
@@ -349,163 +379,130 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
             super().configure()
             super().configure_cache()
-    def download_dataset_artifact(self, dataset: Dataset) -> Path:
+        # Retrieve the model configuration
+        if self.model_configuration:
+            self.config.update(self.model_configuration)
+            logger.info("Model version configuration retrieved")
+        # Retrieve the user configuration
+        if self.user_configuration:
+            self.config.update(self.user_configuration)
+            logger.info("User configuration retrieved")
+    def cleanup_downloaded_artifact(self) -> None:
+        """
+        Cleanup the downloaded dataset artifact if any
+        """
+        if not self.downloaded_dataset_artifact:
+            return
+        self.downloaded_dataset_artifact.unlink(missing_ok=True)
+    def download_dataset_artifact(self, dataset: Dataset) -> None:
         """
         Find and download the compressed archive artifact describing a dataset using
         the [list_artifacts][arkindex_worker.worker.task.TaskMixin.list_artifacts] and
         [download_artifact][arkindex_worker.worker.task.TaskMixin.download_artifact] methods.
         :param dataset: The dataset to retrieve the compressed archive artifact for.
-        :returns: A path to the downloaded artifact.
         :raises MissingDatasetArchive: When the dataset artifact is not found.
         """
+        extra_dir = self.find_extras_directory()
+        archive = extra_dir / dataset.filepath
+        if archive.exists():
+            return
-        task_id = uuid.UUID(dataset.task_id)
+        # Cleanup the dataset artifact that was downloaded previously
+        self.cleanup_downloaded_artifact()
+        logger.info(f"Downloading artifact for {dataset}")
+        task_id = uuid.UUID(dataset.task_id)
         for artifact in self.list_artifacts(task_id):
             if artifact.path != dataset.filepath:
                 continue
-            extra_dir = self.find_extras_directory()
-            archive = extra_dir / dataset.filepath
             archive.write_bytes(self.download_artifact(task_id, artifact).read())
-            return archive
+            self.downloaded_dataset_artifact = archive
+            return
         raise MissingDatasetArchive(
             "The dataset compressed archive artifact was not found."
         )
-    def list_dataset_elements_per_split(
-        self, dataset: Dataset
-    ) -> Iterator[tuple[str, list[Element]]]:
-        """
-        List the elements in the dataset, grouped by split, using the
-        [list_dataset_elements][arkindex_worker.worker.dataset.DatasetMixin.list_dataset_elements] method.
-        :param dataset: The dataset to retrieve elements from.
-        :returns: An iterator of tuples containing the split name and the list of its elements.
-        """
-        def format_split(
-            split: tuple[str, Iterator[tuple[str, Element]]],
-        ) -> tuple[str, list[Element]]:
-            return (split[0], list(map(itemgetter(1), list(split[1]))))
-        return map(
-            format_split,
-            groupby(
-                sorted(self.list_dataset_elements(dataset), key=itemgetter(0)),
-                key=itemgetter(0),
-            ),
-        )
-    def process_dataset(self, dataset: Dataset):
+    def process_set(self, set: Set):
         """
-        Override this method to implement your worker and process a single Arkindex dataset at once.
+        Override this method to implement your worker and process a single Arkindex dataset set at once.
-        :param dataset: The dataset to process.
+        :param set: The set to process.
         """
-    def list_datasets(self) -> Iterator[Dataset] | Iterator[str]:
+    def list_sets(self) -> Iterator[Set]:
         """
-        List the datasets to be processed, either from the CLI arguments or using the
-        [list_process_datasets][arkindex_worker.worker.dataset.DatasetMixin.list_process_datasets] method.
+        List the sets to be processed, either from the CLI arguments or using the
+        [list_process_sets][arkindex_worker.worker.dataset.DatasetMixin.list_process_sets] method.
-        :returns: An iterator of strings if the worker is in read-only mode,
-        else an iterator of ``Dataset`` objects.
+        :returns: An iterator of ``Set`` objects.
         """
-        if self.is_read_only:
-            return map(str, self.args.dataset)
+        if not self.is_read_only:
+            yield from self.list_process_sets()
+        datasets: dict[uuid.UUID, Dataset] = {}
+        for dataset_id, set_name in self.args.set:
+            # Retrieving dataset information is not already cached
+            if dataset_id not in datasets:
+                datasets[dataset_id] = Dataset(
+                    **self.request("RetrieveDataset", id=dataset_id)
+                )
-        return self.list_process_datasets()
+            yield Set(name=set_name, dataset=datasets[dataset_id])
     def run(self):
         """
-        Implements an Arkindex worker that goes through each dataset returned by
-        [list_datasets][arkindex_worker.worker.DatasetWorker.list_datasets].
+        Implements an Arkindex worker that goes through each dataset set returned by
+        [list_sets][arkindex_worker.worker.DatasetWorker.list_sets].
-        It calls [process_dataset][arkindex_worker.worker.DatasetWorker.process_dataset],
-        catching exceptions, and handles updating the [DatasetState][arkindex_worker.worker.dataset.DatasetState]
-        when the worker is a generator.
+        It calls [process_set][arkindex_worker.worker.DatasetWorker.process_set],
+        catching exceptions.
         """
         self.configure()
-        datasets: list[Dataset] | list[str] = list(self.list_datasets())
-        if not datasets:
-            logger.warning("No datasets to process, stopping.")
+        dataset_sets: list[Set] = list(self.list_sets())
+        if not dataset_sets:
+            logger.warning("No sets to process, stopping.")
             sys.exit(1)
-        # Process every dataset
-        count = len(datasets)
+        # Process every set
+        count = len(dataset_sets)
         failed = 0
-        for i, item in enumerate(datasets, start=1):
-            dataset = None
-            dataset_artifact = None
+        for i, dataset_set in enumerate(dataset_sets, start=1):
             try:
-                if not self.is_read_only:
-                    # Just use the result of list_datasets as the dataset
-                    dataset = item
-                else:
-                    # Load dataset using the Arkindex API
-                    dataset = Dataset(**self.request("RetrieveDataset", id=item))
-                if self.generator:
-                    assert (
-                        dataset.state == DatasetState.Open.value
-                    ), "When generating a new dataset, its state should be Open."
-                else:
-                    assert (
-                        dataset.state == DatasetState.Complete.value
-                    ), "When processing an existing dataset, its state should be Complete."
-                logger.info(f"Processing {dataset} ({i}/{count})")
-                if self.generator:
-                    # Update the dataset state to Building
-                    logger.info(f"Building {dataset} ({i}/{count})")
-                    self.update_dataset_state(dataset, DatasetState.Building)
-                else:
-                    logger.info(f"Downloading data for {dataset} ({i}/{count})")
-                    dataset_artifact = self.download_dataset_artifact(dataset)
+                assert (
+                    dataset_set.dataset.state == DatasetState.Complete.value
+                ), "When processing a set, its dataset state should be Complete."
-                # Process the dataset
-                self.process_dataset(dataset)
+                logger.info(f"Retrieving data for {dataset_set} ({i}/{count})")
+                self.download_dataset_artifact(dataset_set.dataset)
-                if self.generator:
-                    # Update the dataset state to Complete
-                    logger.info(f"Completed {dataset} ({i}/{count})")
-                    self.update_dataset_state(dataset, DatasetState.Complete)
+                logger.info(f"Processing {dataset_set} ({i}/{count})")
+                self.process_set(dataset_set)
             except Exception as e:
-                # Handle errors occurring while retrieving, processing or patching the state for this dataset.
+                # Handle errors occurring while retrieving or processing this dataset set
                 failed += 1
-                # Handle the case where we failed retrieving the dataset
-                dataset_id = dataset.id if dataset else item
                 if isinstance(e, ErrorResponse):
-                    message = f"An API error occurred while processing dataset {dataset_id}: {e.title} - {e.content}"
+                    message = f"An API error occurred while processing {dataset_set}: {e.title} - {e.content}"
                 else:
-                    message = (
-                        f"Failed running worker on dataset {dataset_id}: {repr(e)}"
-                    )
+                    message = f"Failed running worker on {dataset_set}: {repr(e)}"
-                logger.warning(
-                    message,
-                    exc_info=e if self.args.verbose else None,
-                )
-                if dataset and self.generator:
-                    # Try to update the state to Error regardless of the response
-                    with contextlib.suppress(Exception):
-                        self.update_dataset_state(dataset, DatasetState.Error)
-            finally:
-                # Cleanup the dataset artifact if it was downloaded, no matter what
-                if dataset_artifact:
-                    dataset_artifact.unlink(missing_ok=True)
+                logger.warning(message, exc_info=e if self.args.verbose else None)
+        # Cleanup the latest downloaded dataset artifact
+        self.cleanup_downloaded_artifact()
+        message = f'Ran on {count} set{"s"[:count>1]}: {count - failed} completed, {failed} failed'
         if failed:
-            logger.error(
-                f"Ran on {count} datasets: {count - failed} completed, {failed} failed"
-            )
+            logger.error(message)
             if failed >= count:  # Everything failed!
                 sys.exit(1)
+        else:
+            logger.info(message)

arkindex_worker/worker/base.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """
 The base class for all Arkindex workers.
 """
 import argparse
 import json
 import logging
@@ -20,7 +21,6 @@ from tenacity import (
     wait_exponential,
 )
-from arkindex import ArkindexClient, options_from_env
 from arkindex_worker import logger
 from arkindex_worker.cache import (
     check_version,
@@ -30,18 +30,7 @@ from arkindex_worker.cache import (
     merge_parents_cache,
 )
 from arkindex_worker.utils import close_delete_file, extract_tar_zst_archive
-def _is_500_error(exc: Exception) -> bool:
-    """
-    Check if an Arkindex API error has a HTTP 5xx error code.
-    Used to retry most API calls in [BaseWorker][arkindex_worker.worker.base.BaseWorker].
-    :param exc: Exception to check
-    """
-    if not isinstance(exc, ErrorResponse):
-        return False
-    return 500 <= exc.status_code < 600
+from teklia_toolbox.requests import _get_arkindex_client, _is_500_error
 class ExtrasDirNotFoundError(Exception):
@@ -72,7 +61,7 @@ class BaseWorker:
         self.parser.add_argument(
             "-c",
             "--config",
-            help="Alternative configuration file when running without a Worker Version ID",
+            help="Alternative configuration file when running without a Worker Run ID",
             type=open,
         )
         self.parser.add_argument(
@@ -94,7 +83,7 @@ class BaseWorker:
             "--dev",
             help=(
                 "Run worker in developer mode. "
-                "Worker will be in read-only state even if a worker_version is supplied. "
+                "Worker will be in read-only state even if a worker run is supplied. "
             ),
             action="store_true",
             default=False,
@@ -148,6 +137,13 @@ class BaseWorker:
         # there is at least one available sqlite database either given or in the parent tasks
         self.use_cache = False
+        # model_version_id will be updated in configure() using the worker_run's model version
+        # or in configure_for_developers() from the environment
+        self.model_version_id = None
+        # model_details will be updated in configure() using the worker_run's model version
+        # or in configure_for_developers() from the environment
+        self.model_details = {}
         # task_parents will be updated in configure_cache() if the cache is supported,
         # if the task ID is set and if no database is passed as argument
         self.task_parents = []
@@ -176,12 +172,20 @@ class BaseWorker:
         """
         return self.args.dev or self.worker_run_id is None
+    @property
+    def worker_version_id(self):
+        """Deprecated property previously used to retrieve the current WorkerVersion ID.
+        :raises DeprecationWarning: Whenever `worker_version_id` is used.
+        """
+        raise DeprecationWarning("`worker_version_id` usage is deprecated")
     def setup_api_client(self):
         """
         Create an ArkindexClient to make API requests towards Arkindex instances.
         """
         # Build Arkindex API client from environment variables
-        self.api_client = ArkindexClient(**options_from_env())
+        self.api_client = _get_arkindex_client()
         logger.debug(f"Setup Arkindex API client on {self.api_client.document.url}")
     def configure_for_developers(self):
@@ -243,25 +247,21 @@ class BaseWorker:
         # Load worker version information
         worker_version = worker_run["worker_version"]
-        # Store worker version id
-        self.worker_version_id = worker_version["id"]
         self.worker_details = worker_version["worker"]
         logger.info(f"Loaded {worker_run['summary']} from API")
         # Load model version configuration when available
         model_version = worker_run.get("model_version")
-        if model_version and model_version.get("configuration"):
+        if model_version:
             logger.info("Loaded model version configuration from WorkerRun")
-            self.model_configuration.update(model_version.get("configuration"))
+            self.model_configuration.update(model_version["configuration"])
             # Set model_version ID as worker attribute
-            self.model_version_id = model_version.get("id")
+            self.model_version_id = model_version["id"]
             # Set model details as worker attribute
-            self.model_details = model_version.get("model")
+            self.model_details = model_version["model"]
         # Retrieve initial configuration from API
         self.config = worker_version["configuration"].get("configuration", {})

arkindex_worker/worker/classification.py CHANGED Viewed

@@ -2,8 +2,6 @@
 ElementsWorker methods for classifications and ML classes.
 """
-from uuid import UUID
 from apistar.exceptions import ErrorResponse
 from peewee import IntegrityError
@@ -154,13 +152,6 @@ class ClassificationMixin:
             # Detect already existing classification
             if e.status_code == 400 and "non_field_errors" in e.content:
                 if (
-                    "The fields element, worker_version, ml_class must make a unique set."
-                    in e.content["non_field_errors"]
-                ):
-                    logger.warning(
-                        f"This worker version has already set {ml_class} on element {element.id}"
-                    )
-                elif (
                     "The fields element, worker_run, ml_class must make a unique set."
                     in e.content["non_field_errors"]
                 ):
@@ -185,10 +176,14 @@ class ClassificationMixin:
         Create multiple classifications at once on the given element through the API.
         :param element: The element to create classifications on.
-        :param classifications: The classifications to create, a list of dicts. Each of them contains
-            a **ml_class_id** (str), the ID of the MLClass for this classification;
-            a **confidence** (float), the confidence score, between 0 and 1;
-            a **high_confidence** (bool), the high confidence state of the classification.
+        :param classifications: A list of dicts representing a classification each, with the following keys:
+            ml_class (str)
+                Required. Name of the MLClass to use.
+            confidence (float)
+                Required. Confidence score for the classification. Must be between 0 and 1.
+            high_confidence (bool)
+                Optional. Whether or not the classification is of high confidence.
         :returns: List of created classifications, as returned in the ``classifications`` field by
            the ``CreateClassifications`` API endpoint.
@@ -201,18 +196,10 @@ class ClassificationMixin:
         ), "classifications shouldn't be null and should be of type list"
         for index, classification in enumerate(classifications):
-            ml_class_id = classification.get("ml_class_id")
+            ml_class = classification.get("ml_class")
             assert (
-                ml_class_id and isinstance(ml_class_id, str)
-            ), f"Classification at index {index} in classifications: ml_class_id shouldn't be null and should be of type str"
-            # Make sure it's a valid UUID
-            try:
-                UUID(ml_class_id)
-            except ValueError as e:
-                raise ValueError(
-                    f"Classification at index {index} in classifications: ml_class_id is not a valid uuid."
-                ) from e
+                ml_class and isinstance(ml_class, str)
+            ), f"Classification at index {index} in classifications: ml_class shouldn't be null and should be of type str"
             confidence = classification.get("confidence")
             assert (
@@ -238,7 +225,13 @@ class ClassificationMixin:
             body={
                 "parent": str(element.id),
                 "worker_run_id": self.worker_run_id,
-                "classifications": classifications,
+                "classifications": [
+                    {
+                        **classification,
+                        "ml_class": self.get_ml_class_id(classification["ml_class"]),
+                    }
+                    for classification in classifications
+                ],
             },
         )["classifications"]

arkindex_worker/worker/dataset.py CHANGED Viewed

@@ -6,7 +6,8 @@ from collections.abc import Iterator
 from enum import Enum
 from arkindex_worker import logger
-from arkindex_worker.models import Dataset, Element
+from arkindex_worker.cache import unsupported_cache
+from arkindex_worker.models import Dataset, Element, Set
 class DatasetState(Enum):
@@ -36,38 +37,43 @@ class DatasetState(Enum):
 class DatasetMixin:
-    def list_process_datasets(self) -> Iterator[Dataset]:
+    def list_process_sets(self) -> Iterator[Set]:
         """
-        List datasets associated to the worker's process. This helper is not available in developer mode.
+        List dataset sets associated to the worker's process. This helper is not available in developer mode.
-        :returns: An iterator of ``Dataset`` objects built from the ``ListProcessDatasets`` API endpoint.
+        :returns: An iterator of ``Set`` objects built from the ``ListProcessSets`` API endpoint.
         """
         assert not self.is_read_only, "This helper is not available in read-only mode."
         results = self.api_client.paginate(
-            "ListProcessDatasets", id=self.process_information["id"]
+            "ListProcessSets", id=self.process_information["id"]
         )
-        return map(Dataset, list(results))
+        return map(
+            lambda result: Set(
+                name=result["set_name"], dataset=Dataset(**result["dataset"])
+            ),
+            results,
+        )
-    def list_dataset_elements(self, dataset: Dataset) -> Iterator[tuple[str, Element]]:
+    def list_set_elements(self, dataset_set: Set) -> Iterator[Element]:
         """
-        List elements in a dataset.
+        List elements in a dataset set.
-        :param dataset: Dataset to find elements in.
-        :returns: An iterator of tuples built from the ``ListDatasetElements`` API endpoint.
+        :param dataset_set: Set to find elements in.
+        :returns: An iterator of Element built from the ``ListDatasetElements`` API endpoint.
         """
-        assert dataset and isinstance(
-            dataset, Dataset
-        ), "dataset shouldn't be null and should be a Dataset"
+        assert dataset_set and isinstance(
+            dataset_set, Set
+        ), "dataset_set shouldn't be null and should be a Set"
-        results = self.api_client.paginate("ListDatasetElements", id=dataset.id)
-        def format_result(result):
-            return (result["set"], Element(**result["element"]))
+        results = self.api_client.paginate(
+            "ListDatasetElements", id=dataset_set.dataset.id, set=dataset_set.name
+        )
-        return map(format_result, list(results))
+        return map(lambda result: Element(**result["element"]), results)
+    @unsupported_cache
     def update_dataset_state(self, dataset: Dataset, state: DatasetState) -> Dataset:
         """
         Partially updates a dataset state through the API.

arkindex-base-worker 0.3.6rc4__py3-none-any.whl → 0.3.7__py3-none-any.whl

arkindex-base-worker 0.3.6rc4py3-none-any.whl → 0.3.7py3-none-any.whl