PyPI - arkindex-base-worker - Versions diffs - 0.4.0rc4__tar.gz → 0.4.0rc6__tar.gz - Mend

arkindex-base-worker 0.4.0rc4tar.gz → 0.4.0rc6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: arkindex-base-worker
-Version: 0.4.0rc4
+Version: 0.4.0rc6
 Summary: Base Worker to easily build Arkindex ML workflows
 Author-email: Teklia <contact@teklia.com>
 Maintainer-email: Teklia <contact@teklia.com>
@@ -41,19 +41,19 @@ Classifier: Programming Language :: Python :: 3.12
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: humanize==4.10.0
+Requires-Dist: humanize==4.11.0
 Requires-Dist: peewee~=3.17
-Requires-Dist: Pillow==10.4.0
-Requires-Dist: python-gnupg==0.5.2
+Requires-Dist: Pillow==11.0.0
+Requires-Dist: python-gnupg==0.5.3
 Requires-Dist: shapely==2.0.6
 Requires-Dist: teklia-toolbox==0.1.7b1
-Requires-Dist: zstandard==0.22.0
+Requires-Dist: zstandard==0.23.0
 Provides-Extra: docs
-Requires-Dist: black==24.4.2; extra == "docs"
-Requires-Dist: mkdocs-material==9.5.33; extra == "docs"
-Requires-Dist: mkdocstrings-python==1.11.1; extra == "docs"
+Requires-Dist: black==24.10.0; extra == "docs"
+Requires-Dist: mkdocs-material==9.5.48; extra == "docs"
+Requires-Dist: mkdocstrings-python==1.12.2; extra == "docs"
 Provides-Extra: tests
-Requires-Dist: pytest==8.3.2; extra == "tests"
+Requires-Dist: pytest==8.3.4; extra == "tests"
 Requires-Dist: pytest-mock==3.14.0; extra == "tests"
 Requires-Dist: pytest-responses==0.5.1; extra == "tests"

{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_base_worker.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: arkindex-base-worker
-Version: 0.4.0rc4
+Version: 0.4.0rc6
 Summary: Base Worker to easily build Arkindex ML workflows
 Author-email: Teklia <contact@teklia.com>
 Maintainer-email: Teklia <contact@teklia.com>
@@ -41,19 +41,19 @@ Classifier: Programming Language :: Python :: 3.12
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: humanize==4.10.0
+Requires-Dist: humanize==4.11.0
 Requires-Dist: peewee~=3.17
-Requires-Dist: Pillow==10.4.0
-Requires-Dist: python-gnupg==0.5.2
+Requires-Dist: Pillow==11.0.0
+Requires-Dist: python-gnupg==0.5.3
 Requires-Dist: shapely==2.0.6
 Requires-Dist: teklia-toolbox==0.1.7b1
-Requires-Dist: zstandard==0.22.0
+Requires-Dist: zstandard==0.23.0
 Provides-Extra: docs
-Requires-Dist: black==24.4.2; extra == "docs"
-Requires-Dist: mkdocs-material==9.5.33; extra == "docs"
-Requires-Dist: mkdocstrings-python==1.11.1; extra == "docs"
+Requires-Dist: black==24.10.0; extra == "docs"
+Requires-Dist: mkdocs-material==9.5.48; extra == "docs"
+Requires-Dist: mkdocstrings-python==1.12.2; extra == "docs"
 Provides-Extra: tests
-Requires-Dist: pytest==8.3.2; extra == "tests"
+Requires-Dist: pytest==8.3.4; extra == "tests"
 Requires-Dist: pytest-mock==3.14.0; extra == "tests"
 Requires-Dist: pytest-responses==0.5.1; extra == "tests"

{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_base_worker.egg-info/SOURCES.txt RENAMED Viewed

@@ -49,6 +49,7 @@ tests/test_elements_worker/test_entity_create.py
 tests/test_elements_worker/test_entity_list_and_check.py
 tests/test_elements_worker/test_image.py
 tests/test_elements_worker/test_metadata.py
+tests/test_elements_worker/test_process.py
 tests/test_elements_worker/test_task.py
 tests/test_elements_worker/test_training.py
 tests/test_elements_worker/test_transcription_create.py

arkindex_base_worker-0.4.0rc6/arkindex_base_worker.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,17 @@
+humanize==4.11.0
+peewee~=3.17
+Pillow==11.0.0
+python-gnupg==0.5.3
+shapely==2.0.6
+teklia-toolbox==0.1.7b1
+zstandard==0.23.0
+[docs]
+black==24.10.0
+mkdocs-material==9.5.48
+mkdocstrings-python==1.12.2
+[tests]
+pytest==8.3.4
+pytest-mock==3.14.0
+pytest-responses==0.5.1

{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/__init__.py RENAMED Viewed

@@ -27,7 +27,7 @@ from arkindex_worker.worker.element import ElementMixin
 from arkindex_worker.worker.entity import EntityMixin
 from arkindex_worker.worker.image import ImageMixin
 from arkindex_worker.worker.metadata import MetaDataMixin, MetaType  # noqa: F401
-from arkindex_worker.worker.process import ActivityState, ProcessMode
+from arkindex_worker.worker.process import ActivityState, ProcessMixin, ProcessMode
 from arkindex_worker.worker.task import TaskMixin
 from arkindex_worker.worker.transcription import TranscriptionMixin
 from arkindex_worker.worker.version import WorkerVersionMixin
@@ -44,6 +44,7 @@ class ElementsWorker(
     EntityMixin,
     MetaDataMixin,
     ImageMixin,
+    ProcessMixin,
 ):
     """
     Base class for ML workers that operate on Arkindex elements.
@@ -119,6 +120,9 @@ class ElementsWorker(
             return list(
                 chain.from_iterable(map(self.list_set_elements, self.list_sets()))
             )
+        elif self.process_mode == ProcessMode.Export:
+            # For export mode processes, use list_process_elements and return element IDs
+            return {item["id"] for item in self.list_process_elements()}
         invalid_element_ids = list(filter(invalid_element_id, out))
         assert (
@@ -133,8 +137,12 @@ class ElementsWorker(
         Whether or not WorkerActivity support has been enabled on the DataImport
         used to run this worker.
         """
-        if self.is_read_only or self.process_mode == ProcessMode.Dataset:
-            # Worker activities are also disabled when running an ElementsWorker in a Dataset process.
+        if self.is_read_only or self.process_mode in [
+            ProcessMode.Dataset,
+            ProcessMode.Export,
+        ]:
+            # Worker activities are also disabled when running an ElementsWorker in a Dataset process
+            # and when running export processes.
             return False
         assert (
             self.process_information

{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/entity.py RENAMED Viewed

@@ -16,9 +16,6 @@ from arkindex_worker.cache import (
 )
 from arkindex_worker.models import Element, Transcription
 from arkindex_worker.utils import (
-    DEFAULT_BATCH_SIZE,
-    batch_publication,
-    make_batches,
     pluralize,
 )
@@ -219,12 +216,10 @@ class EntityMixin:
         return transcription_ent
     @unsupported_cache
-    @batch_publication
     def create_transcription_entities(
         self,
         transcription: Transcription,
         entities: list[Entity],
-        batch_size: int = DEFAULT_BATCH_SIZE,
     ) -> list[dict[str, str]]:
         """
         Create multiple entities attached to a transcription in a single API request.
@@ -247,8 +242,6 @@ class EntityMixin:
             confidence (float or None)
                 Optional confidence score, between 0.0 and 1.0.
-        :param batch_size: The size of each batch, which will be used to split the publication to avoid API errors.
         :return: List of dicts, with each dict having a two keys, `transcription_entity_id` and `entity_id`, holding the UUID of each created object.
         """
         assert transcription and isinstance(
@@ -300,18 +293,14 @@ class EntityMixin:
             )
             return
-        created_entities = [
-            created_entity
-            for batch in make_batches(entities, "entity", batch_size)
-            for created_entity in self.api_client.request(
-                "CreateTranscriptionEntities",
-                id=transcription.id,
-                body={
-                    "worker_run_id": self.worker_run_id,
-                    "entities": batch,
-                },
-            )["entities"]
-        ]
+        created_entities = self.api_client.request(
+            "CreateTranscriptionEntities",
+            id=transcription.id,
+            body={
+                "worker_run_id": self.worker_run_id,
+                "entities": entities,
+            },
+        )["entities"]
         return created_entities

{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/process.py RENAMED Viewed

@@ -1,5 +1,11 @@
+from collections.abc import Iterator
 from enum import Enum
+from arkindex_worker.cache import unsupported_cache
+# Increases the number of elements returned per page by the API
+PROCESS_ELEMENTS_PAGE_SIZE = 500
 class ActivityState(Enum):
     """
@@ -61,3 +67,26 @@ class ProcessMode(Enum):
     """
     Dataset processes.
     """
+    Export = "export"
+    """
+    Export processes.
+    """
+class ProcessMixin:
+    @unsupported_cache
+    def list_process_elements(self, with_image: bool = False) -> Iterator[dict]:
+        """
+        List the elements of a process.
+        :param with_image: whether or not to include zone and image information in the elements response.
+        :returns: the process' elements.
+        """
+        return self.api_client.paginate(
+            "ListProcessElements",
+            id=self.process_information["id"],
+            with_image=with_image,
+            allow_missing_data=True,
+            page_size=PROCESS_ELEMENTS_PAGE_SIZE,
+        )

{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/pyproject.toml RENAMED Viewed

@@ -4,17 +4,17 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "arkindex-base-worker"
-version = "0.4.0rc4"
+version = "0.4.0rc6"
 description = "Base Worker to easily build Arkindex ML workflows"
 license = { file = "LICENSE" }
 dependencies = [
-    "humanize==4.10.0",
+    "humanize==4.11.0",
     "peewee~=3.17",
-    "Pillow==10.4.0",
-    "python-gnupg==0.5.2",
+    "Pillow==11.0.0",
+    "python-gnupg==0.5.3",
     "shapely==2.0.6",
     "teklia-toolbox==0.1.7b1",
-    "zstandard==0.22.0",
+    "zstandard==0.23.0",
 ]
 authors = [
     { name = "Teklia", email = "contact@teklia.com" },
@@ -44,12 +44,12 @@ Authors = "https://teklia.com"
 [project.optional-dependencies]
 docs = [
-    "black==24.4.2",
-    "mkdocs-material==9.5.33",
-    "mkdocstrings-python==1.11.1",
+    "black==24.10.0",
+    "mkdocs-material==9.5.48",
+    "mkdocstrings-python==1.12.2",
 ]
 tests = [
-    "pytest==8.3.2",
+    "pytest==8.3.4",
     "pytest-mock==3.14.0",
     "pytest-responses==0.5.1",
 ]

{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_entity_create.py RENAMED Viewed

@@ -13,7 +13,6 @@ from arkindex_worker.cache import (
     CachedTranscriptionEntity,
 )
 from arkindex_worker.models import Transcription
-from arkindex_worker.utils import DEFAULT_BATCH_SIZE
 from arkindex_worker.worker.transcription import TextOrientation
 from tests import CORPUS_ID
@@ -836,89 +835,50 @@ def test_create_transcription_entities_wrong_entity(
         )
-@pytest.mark.parametrize("batch_size", [DEFAULT_BATCH_SIZE, 1])
-def test_create_transcription_entities(batch_size, responses, mock_elements_worker):
+def test_create_transcription_entities(responses, mock_elements_worker):
     transcription = Transcription(id="transcription-id")
     # Call to Transcription entities creation in bulk
-    if batch_size > 1:
-        responses.add(
-            responses.POST,
-            "http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
-            status=201,
-            match=[
-                matchers.json_params_matcher(
-                    {
-                        "worker_run_id": "56785678-5678-5678-5678-567856785678",
-                        "entities": [
-                            {
-                                "name": "Teklia",
-                                "type_id": "22222222-2222-2222-2222-222222222222",
-                                "offset": 0,
-                                "length": 6,
-                                "confidence": 1.0,
-                            },
-                            {
-                                "name": "Team Rocket",
-                                "type_id": "22222222-2222-2222-2222-222222222222",
-                                "offset": 7,
-                                "length": 11,
-                                "confidence": 1.0,
-                            },
-                        ],
-                    }
-                )
-            ],
-            json={
-                "entities": [
-                    {
-                        "transcription_entity_id": "transc-entity-id",
-                        "entity_id": "entity-id1",
-                    },
-                    {
-                        "transcription_entity_id": "transc-entity-id",
-                        "entity_id": "entity-id2",
-                    },
-                ]
-            },
-        )
-    else:
-        for idx, (name, offset, length) in enumerate(
-            [
-                ("Teklia", 0, 6),
-                ("Team Rocket", 7, 11),
-            ],
-            start=1,
-        ):
-            responses.add(
-                responses.POST,
-                "http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
-                status=201,
-                match=[
-                    matchers.json_params_matcher(
-                        {
-                            "worker_run_id": "56785678-5678-5678-5678-567856785678",
-                            "entities": [
-                                {
-                                    "name": name,
-                                    "type_id": "22222222-2222-2222-2222-222222222222",
-                                    "offset": offset,
-                                    "length": length,
-                                    "confidence": 1.0,
-                                }
-                            ],
-                        }
-                    )
-                ],
-                json={
+    responses.add(
+        responses.POST,
+        "http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
+        status=201,
+        match=[
+            matchers.json_params_matcher(
+                {
+                    "worker_run_id": "56785678-5678-5678-5678-567856785678",
                     "entities": [
                         {
-                            "transcription_entity_id": "transc-entity-id",
-                            "entity_id": f"entity-id{idx}",
-                        }
-                    ]
-                },
+                            "name": "Teklia",
+                            "type_id": "22222222-2222-2222-2222-222222222222",
+                            "offset": 0,
+                            "length": 6,
+                            "confidence": 1.0,
+                        },
+                        {
+                            "name": "Team Rocket",
+                            "type_id": "22222222-2222-2222-2222-222222222222",
+                            "offset": 7,
+                            "length": 11,
+                            "confidence": 1.0,
+                        },
+                    ],
+                }
             )
+        ],
+        json={
+            "entities": [
+                {
+                    "transcription_entity_id": "transc-entity-id",
+                    "entity_id": "entity-id1",
+                },
+                {
+                    "transcription_entity_id": "transc-entity-id",
+                    "entity_id": "entity-id2",
+                },
+            ]
+        },
+    )
     # Store entity type/slug correspondence on the worker
     mock_elements_worker.entity_types = {
@@ -942,26 +902,16 @@ def test_create_transcription_entities(batch_size, responses, mock_elements_work
                 "confidence": 1.0,
             },
         ],
-        batch_size=batch_size,
     )
     assert len(created_objects) == 2
-    bulk_api_calls = [
+    assert len(responses.calls) == len(BASE_API_CALLS) + 1
+    assert [
+        (call.request.method, call.request.url) for call in responses.calls
+    ] == BASE_API_CALLS + [
         (
             "POST",
             "http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
         )
     ]
-    if batch_size != DEFAULT_BATCH_SIZE:
-        bulk_api_calls.append(
-            (
-                "POST",
-                "http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
-            )
-        )
-    assert len(responses.calls) == len(BASE_API_CALLS) + len(bulk_api_calls)
-    assert [
-        (call.request.method, call.request.url) for call in responses.calls
-    ] == BASE_API_CALLS + bulk_api_calls

arkindex_base_worker-0.4.0rc6/tests/test_elements_worker/test_process.py ADDED Viewed

@@ -0,0 +1,89 @@
+import pytest
+from tests import PROCESS_ID
+@pytest.mark.parametrize(
+    ("with_image", "elements"),
+    [
+        (
+            False,
+            [
+                {
+                    "id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
+                    "type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
+                    "name": "element 1",
+                    "confidence": 1,
+                    "image_id": None,
+                    "image_width": None,
+                    "image_height": None,
+                    "image_url": None,
+                    "polygon": None,
+                    "rotation_angle": 0,
+                    "mirrored": False,
+                },
+                {
+                    "id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0",
+                    "type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
+                    "name": "element 2",
+                    "confidence": 1,
+                    "image_id": None,
+                    "image_width": None,
+                    "image_height": None,
+                    "image_url": None,
+                    "polygon": None,
+                    "rotation_angle": 0,
+                    "mirrored": False,
+                },
+            ],
+        ),
+        (
+            True,
+            [
+                {
+                    "id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
+                    "type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
+                    "name": "element 1",
+                    "confidence": 1,
+                    "image_id": "aaa2aaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
+                    "image_width": 76,
+                    "image_height": 138,
+                    "image_url": "http://somewhere.com/iiif/image.jpeg",
+                    "polygon": [[0, 0], [0, 40], [20, 40], [20, 0]],
+                    "rotation_angle": 0,
+                    "mirrored": False,
+                },
+                {
+                    "id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0",
+                    "type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
+                    "name": "element 2",
+                    "confidence": 1,
+                    "image_id": "aaa2aaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
+                    "image_width": 138,
+                    "image_height": 76,
+                    "image_url": "http://somewhere.com/iiif/image.jpeg",
+                    "polygon": [[0, 0], [0, 40], [20, 40], [20, 0]],
+                    "rotation_angle": 0,
+                    "mirrored": False,
+                },
+            ],
+        ),
+    ],
+)
+def test_list_process_elements_with_image(
+    responses, mock_elements_worker, with_image, elements
+):
+    responses.add(
+        responses.GET,
+        f"http://testserver/api/v1/process/{PROCESS_ID}/elements/?page_size=500&with_count=true&with_image={with_image}",
+        status=200,
+        json={
+            "count": 2,
+            "next": None,
+            "results": elements,
+        },
+    )
+    assert (
+        list(mock_elements_worker.list_process_elements(with_image=with_image))
+        == elements
+    )

{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_worker.py RENAMED Viewed

@@ -16,6 +16,7 @@ from arkindex_worker.models import Element
 from arkindex_worker.worker import ActivityState, ElementsWorker
 from arkindex_worker.worker.dataset import DatasetState
 from arkindex_worker.worker.process import ProcessMode
+from tests import PROCESS_ID
 from . import BASE_API_CALLS
@@ -523,6 +524,51 @@ def test_get_elements_both_args_error(mocker, mock_elements_worker, tmp_path):
         worker.get_elements()
+def test_get_elements_export_process(mock_elements_worker, responses):
+    responses.add(
+        responses.GET,
+        f"http://testserver/api/v1/process/{PROCESS_ID}/elements/?page_size=500&with_count=true&with_image=False",
+        status=200,
+        json={
+            "count": 2,
+            "next": None,
+            "results": [
+                {
+                    "id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
+                    "type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
+                    "name": "element 1",
+                    "confidence": 1,
+                    "image_id": None,
+                    "image_width": None,
+                    "image_height": None,
+                    "image_url": None,
+                    "polygon": None,
+                    "rotation_angle": 0,
+                    "mirrored": False,
+                },
+                {
+                    "id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0",
+                    "type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
+                    "name": "element 2",
+                    "confidence": 1,
+                    "image_id": None,
+                    "image_width": None,
+                    "image_height": None,
+                    "image_url": None,
+                    "polygon": None,
+                    "rotation_angle": 0,
+                    "mirrored": False,
+                },
+            ],
+        },
+    )
+    mock_elements_worker.process_information["mode"] = "export"
+    assert set(mock_elements_worker.get_elements()) == {
+        "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
+        "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0",
+    }
 @pytest.mark.usefixtures("_mock_worker_run_api")
 def test_activities_disabled(responses, monkeypatch):
     """Test worker process elements without updating activities when they are disabled for the process"""

arkindex_base_worker-0.4.0rc4/arkindex_base_worker.egg-info/requires.txt DELETED Viewed

@@ -1,17 +0,0 @@
-humanize==4.10.0
-peewee~=3.17
-Pillow==10.4.0
-python-gnupg==0.5.2
-shapely==2.0.6
-teklia-toolbox==0.1.7b1
-zstandard==0.22.0
-[docs]
-black==24.4.2
-mkdocs-material==9.5.33
-mkdocstrings-python==1.11.1
-[tests]
-pytest==8.3.2
-pytest-mock==3.14.0
-pytest-responses==0.5.1