PyPI - arkindex-base-worker - Versions diffs - 0.3.7rc4__py3-none-any.whl → 0.5.0a1__py3-none-any.whl - Mend

arkindex-base-worker 0.3.7rc4py3-none-any.whl → 0.5.0a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

{arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/METADATA +18 -19
arkindex_base_worker-0.5.0a1.dist-info/RECORD +61 -0
{arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/WHEEL +1 -1
{arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/top_level.txt +2 -0
arkindex_worker/cache.py +1 -1
arkindex_worker/image.py +167 -2
arkindex_worker/models.py +18 -0
arkindex_worker/utils.py +98 -4
arkindex_worker/worker/__init__.py +117 -218
arkindex_worker/worker/base.py +39 -46
arkindex_worker/worker/classification.py +45 -29
arkindex_worker/worker/corpus.py +86 -0
arkindex_worker/worker/dataset.py +89 -26
arkindex_worker/worker/element.py +352 -91
arkindex_worker/worker/entity.py +13 -11
arkindex_worker/worker/image.py +21 -0
arkindex_worker/worker/metadata.py +26 -16
arkindex_worker/worker/process.py +92 -0
arkindex_worker/worker/task.py +5 -4
arkindex_worker/worker/training.py +25 -10
arkindex_worker/worker/transcription.py +89 -68
arkindex_worker/worker/version.py +3 -1
hooks/pre_gen_project.py +3 -0
tests/__init__.py +8 -0
tests/conftest.py +47 -58
tests/test_base_worker.py +212 -12
tests/test_dataset_worker.py +294 -437
tests/test_elements_worker/{test_classifications.py → test_classification.py} +313 -200
tests/test_elements_worker/test_cli.py +3 -11
tests/test_elements_worker/test_corpus.py +168 -0
tests/test_elements_worker/test_dataset.py +106 -157
tests/test_elements_worker/test_element.py +427 -0
tests/test_elements_worker/test_element_create_multiple.py +715 -0
tests/test_elements_worker/test_element_create_single.py +528 -0
tests/test_elements_worker/test_element_list_children.py +969 -0
tests/test_elements_worker/test_element_list_parents.py +530 -0
tests/test_elements_worker/{test_entities.py → test_entity_create.py} +37 -195
tests/test_elements_worker/test_entity_list_and_check.py +160 -0
tests/test_elements_worker/test_image.py +66 -0
tests/test_elements_worker/test_metadata.py +252 -161
tests/test_elements_worker/test_process.py +89 -0
tests/test_elements_worker/test_task.py +8 -18
tests/test_elements_worker/test_training.py +17 -8
tests/test_elements_worker/test_transcription_create.py +873 -0
tests/test_elements_worker/test_transcription_create_with_elements.py +951 -0
tests/test_elements_worker/test_transcription_list.py +450 -0
tests/test_elements_worker/test_version.py +60 -0
tests/test_elements_worker/test_worker.py +578 -293
tests/test_image.py +542 -209
tests/test_merge.py +1 -2
tests/test_utils.py +89 -4
worker-demo/tests/__init__.py +0 -0
worker-demo/tests/conftest.py +32 -0
worker-demo/tests/test_worker.py +12 -0
worker-demo/worker_demo/__init__.py +6 -0
worker-demo/worker_demo/worker.py +19 -0
arkindex_base_worker-0.3.7rc4.dist-info/RECORD +0 -41
tests/test_elements_worker/test_elements.py +0 -2713
tests/test_elements_worker/test_transcriptions.py +0 -2119
{arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/LICENSE +0 -0

tests/test_elements_worker/{test_classifications.py → test_classification.py} RENAMED Viewed

@@ -1,12 +1,14 @@
 import json
 import re
-from uuid import UUID, uuid4
+from uuid import UUID
 import pytest
-from apistar.exceptions import ErrorResponse
+from arkindex.exceptions import ErrorResponse
 from arkindex_worker.cache import CachedClassification, CachedElement
 from arkindex_worker.models import Element
+from arkindex_worker.utils import DEFAULT_BATCH_SIZE
+from tests import CORPUS_ID
 from . import BASE_API_CALLS
@@ -15,11 +17,96 @@ from . import BASE_API_CALLS
 DELETE_PARAMETER = "DELETE_PARAMETER"
+def test_load_corpus_classes_api_error(responses, mock_elements_worker):
+    responses.add(
+        responses.GET,
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
+        status=418,
+    )
+    assert not mock_elements_worker.classes
+    with pytest.raises(
+        Exception, match="Stopping pagination as data will be incomplete"
+    ):
+        mock_elements_worker.load_corpus_classes()
+    assert len(responses.calls) == len(BASE_API_CALLS) + 5
+    assert [
+        (call.request.method, call.request.url) for call in responses.calls
+    ] == BASE_API_CALLS + [
+        # We do 5 retries
+        (
+            "GET",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
+        ),
+        (
+            "GET",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
+        ),
+        (
+            "GET",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
+        ),
+        (
+            "GET",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
+        ),
+        (
+            "GET",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
+        ),
+    ]
+    assert not mock_elements_worker.classes
+def test_load_corpus_classes(responses, mock_elements_worker):
+    responses.add(
+        responses.GET,
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
+        status=200,
+        json={
+            "count": 3,
+            "next": None,
+            "results": [
+                {
+                    "id": "0000",
+                    "name": "good",
+                },
+                {
+                    "id": "1111",
+                    "name": "average",
+                },
+                {
+                    "id": "2222",
+                    "name": "bad",
+                },
+            ],
+        },
+    )
+    assert not mock_elements_worker.classes
+    mock_elements_worker.load_corpus_classes()
+    assert len(responses.calls) == len(BASE_API_CALLS) + 1
+    assert [
+        (call.request.method, call.request.url) for call in responses.calls
+    ] == BASE_API_CALLS + [
+        (
+            "GET",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
+        ),
+    ]
+    assert mock_elements_worker.classes == {
+        "good": "0000",
+        "average": "1111",
+        "bad": "2222",
+    }
 def test_get_ml_class_id_load_classes(responses, mock_elements_worker):
-    corpus_id = "11111111-1111-1111-1111-111111111111"
     responses.add(
         responses.GET,
-        f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         status=200,
         json={
             "count": 1,
@@ -42,7 +129,7 @@ def test_get_ml_class_id_load_classes(responses, mock_elements_worker):
     ] == BASE_API_CALLS + [
         (
             "GET",
-            f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         ),
     ]
     assert mock_elements_worker.classes == {"good": "0000"}
@@ -51,12 +138,11 @@ def test_get_ml_class_id_load_classes(responses, mock_elements_worker):
 def test_get_ml_class_id_inexistant_class(mock_elements_worker, responses):
     # A missing class is now created automatically
-    corpus_id = "11111111-1111-1111-1111-111111111111"
     mock_elements_worker.classes = {"good": "0000"}
     responses.add(
         responses.POST,
-        f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         status=201,
         json={"id": "new-ml-class-1234"},
     )
@@ -82,12 +168,10 @@ def test_get_ml_class_id(mock_elements_worker):
 def test_get_ml_class_reload(responses, mock_elements_worker):
-    corpus_id = "11111111-1111-1111-1111-111111111111"
     # Add some initial classes
     responses.add(
         responses.GET,
-        f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         json={
             "count": 1,
             "next": None,
@@ -103,7 +187,7 @@ def test_get_ml_class_reload(responses, mock_elements_worker):
     # Invalid response when trying to create class2
     responses.add(
         responses.POST,
-        f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         status=400,
         json={"non_field_errors": "Already exists"},
     )
@@ -111,7 +195,7 @@ def test_get_ml_class_reload(responses, mock_elements_worker):
     # Add both classes (class2 is created by another process)
     responses.add(
         responses.GET,
-        f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         json={
             "count": 2,
             "next": None,
@@ -141,15 +225,15 @@ def test_get_ml_class_reload(responses, mock_elements_worker):
     ] == BASE_API_CALLS + [
         (
             "GET",
-            f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         ),
         (
             "POST",
-            f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         ),
         (
             "GET",
-            f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         ),
     ]
@@ -169,7 +253,7 @@ def test_retrieve_ml_class_not_in_cache(responses, mock_elements_worker):
     """
     responses.add(
         responses.GET,
-        f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/classes/",
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         status=200,
         json={
             "count": 1,
@@ -189,7 +273,7 @@ def test_retrieve_ml_class_not_in_cache(responses, mock_elements_worker):
     ] == BASE_API_CALLS + [
         (
             "GET",
-            f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/classes/",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         ),
     ]
@@ -276,7 +360,7 @@ def test_create_classification_api_error(responses, mock_elements_worker):
     responses.add(
         responses.POST,
         "http://testserver/api/v1/classifications/",
-        status=500,
+        status=418,
     )
     with pytest.raises(ErrorResponse):
@@ -287,17 +371,10 @@ def test_create_classification_api_error(responses, mock_elements_worker):
             high_confidence=True,
         )
-    assert len(responses.calls) == len(BASE_API_CALLS) + 5
+    assert len(responses.calls) == len(BASE_API_CALLS) + 1
     assert [
         (call.request.method, call.request.url) for call in responses.calls
-    ] == BASE_API_CALLS + [
-        # We retry 5 times the API call
-        ("POST", "http://testserver/api/v1/classifications/"),
-        ("POST", "http://testserver/api/v1/classifications/"),
-        ("POST", "http://testserver/api/v1/classifications/"),
-        ("POST", "http://testserver/api/v1/classifications/"),
-        ("POST", "http://testserver/api/v1/classifications/"),
-    ]
+    ] == BASE_API_CALLS + [("POST", "http://testserver/api/v1/classifications/")]
 def test_create_classification_create_ml_class(mock_elements_worker, responses):
@@ -306,7 +383,7 @@ def test_create_classification_create_ml_class(mock_elements_worker, responses):
     # Automatically create a missing class!
     responses.add(
         responses.POST,
-        "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/classes/",
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         status=201,
         json={"id": "new-ml-class-1234"},
     )
@@ -325,15 +402,12 @@ def test_create_classification_create_ml_class(mock_elements_worker, responses):
     )
     # Check a class & classification has been created
-    for call in responses.calls:
-        print(call.request.url, call.request.body)
     assert [
         (call.request.url, json.loads(call.request.body))
         for call in responses.calls[-2:]
     ] == [
         (
-            "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/classes/",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
             {"name": "a_class"},
         ),
         (
@@ -506,12 +580,12 @@ def test_create_classifications_wrong_data(
                 "element": Element({"id": "12341234-1234-1234-1234-123412341234"}),
                 "classifications": [
                     {
-                        "ml_class_id": "uuid1",
+                        "ml_class": "cat",
                         "confidence": 0.75,
                         "high_confidence": False,
                     },
                     {
-                        "ml_class_id": "uuid2",
+                        "ml_class": "dog",
                         "confidence": 0.25,
                         "high_confidence": False,
                     },
@@ -523,86 +597,71 @@ def test_create_classifications_wrong_data(
 @pytest.mark.parametrize(
-    ("arg_name", "data", "error_message", "error_type"),
+    ("arg_name", "data", "error_message"),
     [
-        # Wrong classifications > ml_class_id
+        # Wrong classifications > ml_class
         (
-            "ml_class_id",
+            "ml_class",
             DELETE_PARAMETER,
-            "ml_class_id shouldn't be null and should be of type str",
-            AssertionError,
-        ),  # Updated
+            "ml_class shouldn't be null and should be of type str",
+        ),
         (
-            "ml_class_id",
+            "ml_class",
             None,
-            "ml_class_id shouldn't be null and should be of type str",
-            AssertionError,
+            "ml_class shouldn't be null and should be of type str",
         ),
         (
-            "ml_class_id",
+            "ml_class",
             1234,
-            "ml_class_id shouldn't be null and should be of type str",
-            AssertionError,
-        ),
-        (
-            "ml_class_id",
-            "not_an_uuid",
-            "ml_class_id is not a valid uuid.",
-            ValueError,
+            "ml_class shouldn't be null and should be of type str",
         ),
         # Wrong classifications > confidence
         (
             "confidence",
             DELETE_PARAMETER,
             "confidence shouldn't be null and should be a float in [0..1] range",
-            AssertionError,
         ),
         (
             "confidence",
             None,
             "confidence shouldn't be null and should be a float in [0..1] range",
-            AssertionError,
         ),
         (
             "confidence",
             "wrong confidence",
             "confidence shouldn't be null and should be a float in [0..1] range",
-            AssertionError,
         ),
         (
             "confidence",
             0,
             "confidence shouldn't be null and should be a float in [0..1] range",
-            AssertionError,
         ),
         (
             "confidence",
             2.00,
             "confidence shouldn't be null and should be a float in [0..1] range",
-            AssertionError,
         ),
         # Wrong classifications > high_confidence
         (
             "high_confidence",
             "wrong high_confidence",
             "high_confidence should be of type bool",
-            AssertionError,
         ),
     ],
 )
 def test_create_classifications_wrong_classifications_data(
-    arg_name, data, error_message, error_type, mock_elements_worker
+    arg_name, data, error_message, mock_elements_worker
 ):
     all_data = {
         "element": Element({"id": "12341234-1234-1234-1234-123412341234"}),
         "classifications": [
             {
-                "ml_class_id": str(uuid4()),
+                "ml_class": "cat",
                 "confidence": 0.75,
                 "high_confidence": False,
             },
             {
-                "ml_class_id": str(uuid4()),
+                "ml_class": "dog",
                 "confidence": 0.25,
                 "high_confidence": False,
                 # Overwrite with wrong data
@@ -614,7 +673,7 @@ def test_create_classifications_wrong_classifications_data(
         del all_data["classifications"][1][arg_name]
     with pytest.raises(
-        error_type,
+        AssertionError,
         match=re.escape(
             f"Classification at index 1 in classifications: {error_message}"
         ),
@@ -623,20 +682,21 @@ def test_create_classifications_wrong_classifications_data(
 def test_create_classifications_api_error(responses, mock_elements_worker):
+    mock_elements_worker.classes = {"cat": "0000", "dog": "1111"}
     responses.add(
         responses.POST,
         "http://testserver/api/v1/classification/bulk/",
-        status=500,
+        status=418,
     )
     elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
     classes = [
         {
-            "ml_class_id": str(uuid4()),
+            "ml_class": "cat",
             "confidence": 0.75,
             "high_confidence": False,
         },
         {
-            "ml_class_id": str(uuid4()),
+            "ml_class": "dog",
             "confidence": 0.25,
             "high_confidence": False,
         },
@@ -647,192 +707,245 @@ def test_create_classifications_api_error(responses, mock_elements_worker):
             element=elt, classifications=classes
         )
-    assert len(responses.calls) == len(BASE_API_CALLS) + 5
+    assert len(responses.calls) == len(BASE_API_CALLS) + 1
     assert [
         (call.request.method, call.request.url) for call in responses.calls
-    ] == BASE_API_CALLS + [
-        # We retry 5 times the API call
-        ("POST", "http://testserver/api/v1/classification/bulk/"),
-        ("POST", "http://testserver/api/v1/classification/bulk/"),
-        ("POST", "http://testserver/api/v1/classification/bulk/"),
-        ("POST", "http://testserver/api/v1/classification/bulk/"),
-        ("POST", "http://testserver/api/v1/classification/bulk/"),
-    ]
+    ] == BASE_API_CALLS + [("POST", "http://testserver/api/v1/classification/bulk/")]
-def test_create_classifications(responses, mock_elements_worker_with_cache):
-    # Set MLClass in cache
-    portrait_uuid = str(uuid4())
-    landscape_uuid = str(uuid4())
-    mock_elements_worker_with_cache.classes = {
-        "portrait": portrait_uuid,
-        "landscape": landscape_uuid,
-    }
-    elt = CachedElement.create(id="12341234-1234-1234-1234-123412341234", type="thing")
-    classes = [
-        {
-            "ml_class_id": portrait_uuid,
-            "confidence": 0.75,
-            "high_confidence": False,
-        },
-        {
-            "ml_class_id": landscape_uuid,
-            "confidence": 0.25,
-            "high_confidence": False,
-        },
-    ]
+def test_create_classifications_create_ml_class(mock_elements_worker, responses):
+    elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
+    # Automatically create a missing class!
+    responses.add(
+        responses.POST,
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
+        status=201,
+        json={"id": "new-ml-class-1234"},
+    )
     responses.add(
         responses.POST,
         "http://testserver/api/v1/classification/bulk/",
-        status=200,
+        status=201,
         json={
             "parent": str(elt.id),
             "worker_run_id": "56785678-5678-5678-5678-567856785678",
             "classifications": [
                 {
                     "id": "00000000-0000-0000-0000-000000000000",
-                    "ml_class": portrait_uuid,
+                    "ml_class": "new-ml-class-1234",
                     "confidence": 0.75,
                     "high_confidence": False,
                     "state": "pending",
                 },
-                {
-                    "id": "11111111-1111-1111-1111-111111111111",
-                    "ml_class": landscape_uuid,
-                    "confidence": 0.25,
-                    "high_confidence": False,
-                    "state": "pending",
-                },
             ],
         },
     )
-    mock_elements_worker_with_cache.create_classifications(
-        element=elt, classifications=classes
+    mock_elements_worker.classes = {"another_class": "0000"}
+    mock_elements_worker.create_classifications(
+        element=elt,
+        classifications=[
+            {
+                "ml_class": "a_class",
+                "confidence": 0.75,
+                "high_confidence": False,
+            }
+        ],
     )
-    assert len(responses.calls) == len(BASE_API_CALLS) + 1
+    # Check a class & classification has been created
+    assert len(responses.calls) == len(BASE_API_CALLS) + 2
     assert [
         (call.request.method, call.request.url) for call in responses.calls
     ] == BASE_API_CALLS + [
+        (
+            "POST",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
+        ),
         ("POST", "http://testserver/api/v1/classification/bulk/"),
     ]
+    assert json.loads(responses.calls[-2].request.body) == {"name": "a_class"}
     assert json.loads(responses.calls[-1].request.body) == {
-        "parent": str(elt.id),
+        "parent": "12341234-1234-1234-1234-123412341234",
         "worker_run_id": "56785678-5678-5678-5678-567856785678",
-        "classifications": classes,
+        "classifications": [
+            {
+                "ml_class": "new-ml-class-1234",
+                "confidence": 0.75,
+                "high_confidence": False,
+            }
+        ],
     }
-    # Check that created classifications were properly stored in SQLite cache
-    assert list(CachedClassification.select()) == [
-        CachedClassification(
-            id=UUID("00000000-0000-0000-0000-000000000000"),
-            element_id=UUID(elt.id),
-            class_name="portrait",
-            confidence=0.75,
-            state="pending",
-            worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
-        ),
-        CachedClassification(
-            id=UUID("11111111-1111-1111-1111-111111111111"),
-            element_id=UUID(elt.id),
-            class_name="landscape",
-            confidence=0.25,
-            state="pending",
-            worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
-        ),
-    ]
-def test_create_classifications_not_in_cache(
-    responses, mock_elements_worker_with_cache
-):
-    """
-    CreateClassifications using ID that are not in `.classes` attribute.
-    Will load corpus MLClass to insert the corresponding name in Cache.
-    """
-    portrait_uuid = str(uuid4())
-    landscape_uuid = str(uuid4())
-    elt = CachedElement.create(id="12341234-1234-1234-1234-123412341234", type="thing")
-    classes = [
-        {
-            "ml_class_id": portrait_uuid,
-            "confidence": 0.75,
-            "high_confidence": False,
-        },
-        {
-            "ml_class_id": landscape_uuid,
-            "confidence": 0.25,
-            "high_confidence": False,
-        },
-    ]
+@pytest.mark.parametrize("batch_size", [DEFAULT_BATCH_SIZE, 1])
+def test_create_classifications(batch_size, responses, mock_elements_worker):
+    mock_elements_worker.classes = {"portrait": "0000", "landscape": "1111"}
+    elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
     responses.add(
         responses.POST,
         "http://testserver/api/v1/classification/bulk/",
         status=200,
-        json={
-            "parent": str(elt.id),
-            "worker_run_id": "56785678-5678-5678-5678-567856785678",
-            "classifications": [
-                {
-                    "id": "00000000-0000-0000-0000-000000000000",
-                    "ml_class": portrait_uuid,
-                    "confidence": 0.75,
-                    "high_confidence": False,
-                    "state": "pending",
-                },
-                {
-                    "id": "11111111-1111-1111-1111-111111111111",
-                    "ml_class": landscape_uuid,
-                    "confidence": 0.25,
-                    "high_confidence": False,
-                    "state": "pending",
-                },
-            ],
-        },
+        json={"classifications": []},
     )
-    responses.add(
-        responses.GET,
-        f"http://testserver/api/v1/corpus/{mock_elements_worker_with_cache.corpus_id}/classes/",
-        status=200,
-        json={
-            "count": 2,
-            "next": None,
-            "results": [
-                {
-                    "id": portrait_uuid,
-                    "name": "portrait",
-                },
-                {"id": landscape_uuid, "name": "landscape"},
-            ],
-        },
+    mock_elements_worker.create_classifications(
+        element=elt,
+        classifications=[
+            {
+                "ml_class": "portrait",
+                "confidence": 0.75,
+                "high_confidence": False,
+            },
+            {
+                "ml_class": "landscape",
+                "confidence": 0.25,
+                "high_confidence": False,
+            },
+        ],
+        batch_size=batch_size,
     )
+    bulk_api_calls = [("POST", "http://testserver/api/v1/classification/bulk/")]
+    if batch_size != DEFAULT_BATCH_SIZE:
+        bulk_api_calls.append(("POST", "http://testserver/api/v1/classification/bulk/"))
+    assert len(responses.calls) == len(BASE_API_CALLS) + len(bulk_api_calls)
+    assert [
+        (call.request.method, call.request.url) for call in responses.calls
+    ] == BASE_API_CALLS + bulk_api_calls
+    first_cl = {"confidence": 0.75, "high_confidence": False, "ml_class": "0000"}
+    second_cl = {"confidence": 0.25, "high_confidence": False, "ml_class": "1111"}
+    empty_payload = {
+        "parent": str(elt.id),
+        "worker_run_id": "56785678-5678-5678-5678-567856785678",
+        "classifications": [],
+    }
+    bodies = []
+    first_call_idx = None
+    if batch_size > 1:
+        first_call_idx = -1
+        bodies.append({**empty_payload, "classifications": [first_cl, second_cl]})
+    else:
+        first_call_idx = -2
+        bodies.append({**empty_payload, "classifications": [first_cl]})
+        bodies.append({**empty_payload, "classifications": [second_cl]})
+    assert [
+        json.loads(bulk_call.request.body)
+        for bulk_call in responses.calls[first_call_idx:]
+    ] == bodies
+@pytest.mark.parametrize("batch_size", [DEFAULT_BATCH_SIZE, 1])
+def test_create_classifications_with_cache(
+    batch_size, responses, mock_elements_worker_with_cache
+):
+    mock_elements_worker_with_cache.classes = {"portrait": "0000", "landscape": "1111"}
+    elt = CachedElement.create(id="12341234-1234-1234-1234-123412341234", type="thing")
+    if batch_size > 1:
+        responses.add(
+            responses.POST,
+            "http://testserver/api/v1/classification/bulk/",
+            status=200,
+            json={
+                "parent": str(elt.id),
+                "worker_run_id": "56785678-5678-5678-5678-567856785678",
+                "classifications": [
+                    {
+                        "id": "00000000-0000-0000-0000-000000000000",
+                        "ml_class": "0000",
+                        "confidence": 0.75,
+                        "high_confidence": False,
+                        "state": "pending",
+                    },
+                    {
+                        "id": "11111111-1111-1111-1111-111111111111",
+                        "ml_class": "1111",
+                        "confidence": 0.25,
+                        "high_confidence": False,
+                        "state": "pending",
+                    },
+                ],
+            },
+        )
+    else:
+        for cl_id, cl_class, cl_conf in [
+            ("00000000-0000-0000-0000-000000000000", "0000", 0.75),
+            ("11111111-1111-1111-1111-111111111111", "1111", 0.25),
+        ]:
+            responses.add(
+                responses.POST,
+                "http://testserver/api/v1/classification/bulk/",
+                status=200,
+                json={
+                    "parent": str(elt.id),
+                    "worker_run_id": "56785678-5678-5678-5678-567856785678",
+                    "classifications": [
+                        {
+                            "id": cl_id,
+                            "ml_class": cl_class,
+                            "confidence": cl_conf,
+                            "high_confidence": False,
+                            "state": "pending",
+                        },
+                    ],
+                },
+            )
     mock_elements_worker_with_cache.create_classifications(
-        element=elt, classifications=classes
+        element=elt,
+        classifications=[
+            {
+                "ml_class": "portrait",
+                "confidence": 0.75,
+                "high_confidence": False,
+            },
+            {
+                "ml_class": "landscape",
+                "confidence": 0.25,
+                "high_confidence": False,
+            },
+        ],
+        batch_size=batch_size,
     )
-    assert len(responses.calls) == len(BASE_API_CALLS) + 2
+    bulk_api_calls = [("POST", "http://testserver/api/v1/classification/bulk/")]
+    if batch_size != DEFAULT_BATCH_SIZE:
+        bulk_api_calls.append(("POST", "http://testserver/api/v1/classification/bulk/"))
+    assert len(responses.calls) == len(BASE_API_CALLS) + len(bulk_api_calls)
     assert [
         (call.request.method, call.request.url) for call in responses.calls
-    ] == BASE_API_CALLS + [
-        ("POST", "http://testserver/api/v1/classification/bulk/"),
-        (
-            "GET",
-            f"http://testserver/api/v1/corpus/{mock_elements_worker_with_cache.corpus_id}/classes/",
-        ),
-    ]
+    ] == BASE_API_CALLS + bulk_api_calls
-    assert json.loads(responses.calls[-2].request.body) == {
+    first_cl = {"confidence": 0.75, "high_confidence": False, "ml_class": "0000"}
+    second_cl = {"confidence": 0.25, "high_confidence": False, "ml_class": "1111"}
+    empty_payload = {
         "parent": str(elt.id),
         "worker_run_id": "56785678-5678-5678-5678-567856785678",
-        "classifications": classes,
+        "classifications": [],
     }
+    bodies = []
+    first_call_idx = None
+    if batch_size > 1:
+        first_call_idx = -1
+        bodies.append({**empty_payload, "classifications": [first_cl, second_cl]})
+    else:
+        first_call_idx = -2
+        bodies.append({**empty_payload, "classifications": [first_cl]})
+        bodies.append({**empty_payload, "classifications": [second_cl]})
+    assert [
+        json.loads(bulk_call.request.body)
+        for bulk_call in responses.calls[first_call_idx:]
+    ] == bodies
     # Check that created classifications were properly stored in SQLite cache
     assert list(CachedClassification.select()) == [
         CachedClassification(

arkindex-base-worker 0.3.7rc4__py3-none-any.whl → 0.5.0a1__py3-none-any.whl

arkindex-base-worker 0.3.7rc4py3-none-any.whl → 0.5.0a1py3-none-any.whl