PyPI - arkindex-base-worker - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.0a2__py3-none-any.whl - Mend

arkindex-base-worker 0.4.0py3-none-any.whl → 0.4.0a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

{arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a2.dist-info}/METADATA +13 -15
arkindex_base_worker-0.4.0a2.dist-info/RECORD +51 -0
{arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a2.dist-info}/WHEEL +1 -1
arkindex_worker/cache.py +1 -1
arkindex_worker/image.py +1 -120
arkindex_worker/utils.py +0 -82
arkindex_worker/worker/__init__.py +161 -46
arkindex_worker/worker/base.py +11 -36
arkindex_worker/worker/classification.py +18 -34
arkindex_worker/worker/corpus.py +4 -21
arkindex_worker/worker/dataset.py +1 -71
arkindex_worker/worker/element.py +91 -352
arkindex_worker/worker/entity.py +11 -11
arkindex_worker/worker/metadata.py +9 -19
arkindex_worker/worker/task.py +4 -5
arkindex_worker/worker/training.py +6 -6
arkindex_worker/worker/transcription.py +68 -89
arkindex_worker/worker/version.py +1 -3
tests/__init__.py +1 -1
tests/conftest.py +45 -33
tests/test_base_worker.py +3 -204
tests/test_dataset_worker.py +4 -7
tests/test_elements_worker/{test_classification.py → test_classifications.py} +61 -194
tests/test_elements_worker/test_corpus.py +1 -32
tests/test_elements_worker/test_dataset.py +1 -1
tests/test_elements_worker/test_elements.py +2734 -0
tests/test_elements_worker/{test_entity_create.py → test_entities.py} +160 -26
tests/test_elements_worker/test_image.py +1 -2
tests/test_elements_worker/test_metadata.py +99 -224
tests/test_elements_worker/test_task.py +1 -1
tests/test_elements_worker/test_training.py +2 -2
tests/test_elements_worker/test_transcriptions.py +2102 -0
tests/test_elements_worker/test_worker.py +280 -563
tests/test_image.py +204 -429
tests/test_merge.py +2 -1
tests/test_utils.py +3 -66
arkindex_base_worker-0.4.0.dist-info/RECORD +0 -61
arkindex_worker/worker/process.py +0 -92
tests/test_elements_worker/test_element.py +0 -427
tests/test_elements_worker/test_element_create_multiple.py +0 -715
tests/test_elements_worker/test_element_create_single.py +0 -528
tests/test_elements_worker/test_element_list_children.py +0 -969
tests/test_elements_worker/test_element_list_parents.py +0 -530
tests/test_elements_worker/test_entity_list_and_check.py +0 -160
tests/test_elements_worker/test_process.py +0 -89
tests/test_elements_worker/test_transcription_create.py +0 -873
tests/test_elements_worker/test_transcription_create_with_elements.py +0 -951
tests/test_elements_worker/test_transcription_list.py +0 -450
tests/test_elements_worker/test_version.py +0 -60
{arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a2.dist-info}/LICENSE +0 -0
{arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a2.dist-info}/top_level.txt +0 -0

tests/test_elements_worker/{test_classification.py → test_classifications.py} RENAMED Viewed

@@ -3,11 +3,10 @@ import re
 from uuid import UUID
 import pytest
+from apistar.exceptions import ErrorResponse
-from arkindex.exceptions import ErrorResponse
 from arkindex_worker.cache import CachedClassification, CachedElement
 from arkindex_worker.models import Element
-from arkindex_worker.utils import DEFAULT_BATCH_SIZE
 from tests import CORPUS_ID
 from . import BASE_API_CALLS
@@ -17,92 +16,6 @@ from . import BASE_API_CALLS
 DELETE_PARAMETER = "DELETE_PARAMETER"
-def test_load_corpus_classes_api_error(responses, mock_elements_worker):
-    responses.add(
-        responses.GET,
-        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
-        status=418,
-    )
-    assert not mock_elements_worker.classes
-    with pytest.raises(
-        Exception, match="Stopping pagination as data will be incomplete"
-    ):
-        mock_elements_worker.load_corpus_classes()
-    assert len(responses.calls) == len(BASE_API_CALLS) + 5
-    assert [
-        (call.request.method, call.request.url) for call in responses.calls
-    ] == BASE_API_CALLS + [
-        # We do 5 retries
-        (
-            "GET",
-            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
-        ),
-        (
-            "GET",
-            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
-        ),
-        (
-            "GET",
-            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
-        ),
-        (
-            "GET",
-            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
-        ),
-        (
-            "GET",
-            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
-        ),
-    ]
-    assert not mock_elements_worker.classes
-def test_load_corpus_classes(responses, mock_elements_worker):
-    responses.add(
-        responses.GET,
-        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
-        status=200,
-        json={
-            "count": 3,
-            "next": None,
-            "results": [
-                {
-                    "id": "0000",
-                    "name": "good",
-                },
-                {
-                    "id": "1111",
-                    "name": "average",
-                },
-                {
-                    "id": "2222",
-                    "name": "bad",
-                },
-            ],
-        },
-    )
-    assert not mock_elements_worker.classes
-    mock_elements_worker.load_corpus_classes()
-    assert len(responses.calls) == len(BASE_API_CALLS) + 1
-    assert [
-        (call.request.method, call.request.url) for call in responses.calls
-    ] == BASE_API_CALLS + [
-        (
-            "GET",
-            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
-        ),
-    ]
-    assert mock_elements_worker.classes == {
-        "good": "0000",
-        "average": "1111",
-        "bad": "2222",
-    }
 def test_get_ml_class_id_load_classes(responses, mock_elements_worker):
     responses.add(
         responses.GET,
@@ -779,8 +692,7 @@ def test_create_classifications_create_ml_class(mock_elements_worker, responses)
     }
-@pytest.mark.parametrize("batch_size", [DEFAULT_BATCH_SIZE, 1])
-def test_create_classifications(batch_size, responses, mock_elements_worker):
+def test_create_classifications(responses, mock_elements_worker):
     mock_elements_worker.classes = {"portrait": "0000", "landscape": "1111"}
     elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
     responses.add(
@@ -804,98 +716,62 @@ def test_create_classifications(batch_size, responses, mock_elements_worker):
                 "high_confidence": False,
             },
         ],
-        batch_size=batch_size,
     )
-    bulk_api_calls = [("POST", "http://testserver/api/v1/classification/bulk/")]
-    if batch_size != DEFAULT_BATCH_SIZE:
-        bulk_api_calls.append(("POST", "http://testserver/api/v1/classification/bulk/"))
-    assert len(responses.calls) == len(BASE_API_CALLS) + len(bulk_api_calls)
+    assert len(responses.calls) == len(BASE_API_CALLS) + 1
     assert [
         (call.request.method, call.request.url) for call in responses.calls
-    ] == BASE_API_CALLS + bulk_api_calls
+    ] == BASE_API_CALLS + [
+        ("POST", "http://testserver/api/v1/classification/bulk/"),
+    ]
-    first_cl = {"confidence": 0.75, "high_confidence": False, "ml_class": "0000"}
-    second_cl = {"confidence": 0.25, "high_confidence": False, "ml_class": "1111"}
-    empty_payload = {
+    assert json.loads(responses.calls[-1].request.body) == {
         "parent": str(elt.id),
         "worker_run_id": "56785678-5678-5678-5678-567856785678",
-        "classifications": [],
+        "classifications": [
+            {
+                "confidence": 0.75,
+                "high_confidence": False,
+                "ml_class": "0000",
+            },
+            {
+                "confidence": 0.25,
+                "high_confidence": False,
+                "ml_class": "1111",
+            },
+        ],
     }
-    bodies = []
-    first_call_idx = None
-    if batch_size > 1:
-        first_call_idx = -1
-        bodies.append({**empty_payload, "classifications": [first_cl, second_cl]})
-    else:
-        first_call_idx = -2
-        bodies.append({**empty_payload, "classifications": [first_cl]})
-        bodies.append({**empty_payload, "classifications": [second_cl]})
-    assert [
-        json.loads(bulk_call.request.body)
-        for bulk_call in responses.calls[first_call_idx:]
-    ] == bodies
-@pytest.mark.parametrize("batch_size", [DEFAULT_BATCH_SIZE, 1])
-def test_create_classifications_with_cache(
-    batch_size, responses, mock_elements_worker_with_cache
-):
+def test_create_classifications_with_cache(responses, mock_elements_worker_with_cache):
     mock_elements_worker_with_cache.classes = {"portrait": "0000", "landscape": "1111"}
     elt = CachedElement.create(id="12341234-1234-1234-1234-123412341234", type="thing")
-    if batch_size > 1:
-        responses.add(
-            responses.POST,
-            "http://testserver/api/v1/classification/bulk/",
-            status=200,
-            json={
-                "parent": str(elt.id),
-                "worker_run_id": "56785678-5678-5678-5678-567856785678",
-                "classifications": [
-                    {
-                        "id": "00000000-0000-0000-0000-000000000000",
-                        "ml_class": "0000",
-                        "confidence": 0.75,
-                        "high_confidence": False,
-                        "state": "pending",
-                    },
-                    {
-                        "id": "11111111-1111-1111-1111-111111111111",
-                        "ml_class": "1111",
-                        "confidence": 0.25,
-                        "high_confidence": False,
-                        "state": "pending",
-                    },
-                ],
-            },
-        )
-    else:
-        for cl_id, cl_class, cl_conf in [
-            ("00000000-0000-0000-0000-000000000000", "0000", 0.75),
-            ("11111111-1111-1111-1111-111111111111", "1111", 0.25),
-        ]:
-            responses.add(
-                responses.POST,
-                "http://testserver/api/v1/classification/bulk/",
-                status=200,
-                json={
-                    "parent": str(elt.id),
-                    "worker_run_id": "56785678-5678-5678-5678-567856785678",
-                    "classifications": [
-                        {
-                            "id": cl_id,
-                            "ml_class": cl_class,
-                            "confidence": cl_conf,
-                            "high_confidence": False,
-                            "state": "pending",
-                        },
-                    ],
+    responses.add(
+        responses.POST,
+        "http://testserver/api/v1/classification/bulk/",
+        status=200,
+        json={
+            "parent": str(elt.id),
+            "worker_run_id": "56785678-5678-5678-5678-567856785678",
+            "classifications": [
+                {
+                    "id": "00000000-0000-0000-0000-000000000000",
+                    "ml_class": "0000",
+                    "confidence": 0.75,
+                    "high_confidence": False,
+                    "state": "pending",
+                },
+                {
+                    "id": "11111111-1111-1111-1111-111111111111",
+                    "ml_class": "1111",
+                    "confidence": 0.25,
+                    "high_confidence": False,
+                    "state": "pending",
                 },
-            )
+            ],
+        },
+    )
     mock_elements_worker_with_cache.create_classifications(
         element=elt,
@@ -911,41 +787,32 @@ def test_create_classifications_with_cache(
                 "high_confidence": False,
             },
         ],
-        batch_size=batch_size,
     )
-    bulk_api_calls = [("POST", "http://testserver/api/v1/classification/bulk/")]
-    if batch_size != DEFAULT_BATCH_SIZE:
-        bulk_api_calls.append(("POST", "http://testserver/api/v1/classification/bulk/"))
-    assert len(responses.calls) == len(BASE_API_CALLS) + len(bulk_api_calls)
+    assert len(responses.calls) == len(BASE_API_CALLS) + 1
     assert [
         (call.request.method, call.request.url) for call in responses.calls
-    ] == BASE_API_CALLS + bulk_api_calls
+    ] == BASE_API_CALLS + [
+        ("POST", "http://testserver/api/v1/classification/bulk/"),
+    ]
-    first_cl = {"confidence": 0.75, "high_confidence": False, "ml_class": "0000"}
-    second_cl = {"confidence": 0.25, "high_confidence": False, "ml_class": "1111"}
-    empty_payload = {
+    assert json.loads(responses.calls[-1].request.body) == {
         "parent": str(elt.id),
         "worker_run_id": "56785678-5678-5678-5678-567856785678",
-        "classifications": [],
+        "classifications": [
+            {
+                "confidence": 0.75,
+                "high_confidence": False,
+                "ml_class": "0000",
+            },
+            {
+                "confidence": 0.25,
+                "high_confidence": False,
+                "ml_class": "1111",
+            },
+        ],
     }
-    bodies = []
-    first_call_idx = None
-    if batch_size > 1:
-        first_call_idx = -1
-        bodies.append({**empty_payload, "classifications": [first_cl, second_cl]})
-    else:
-        first_call_idx = -2
-        bodies.append({**empty_payload, "classifications": [first_cl]})
-        bodies.append({**empty_payload, "classifications": [second_cl]})
-    assert [
-        json.loads(bulk_call.request.body)
-        for bulk_call in responses.calls[first_call_idx:]
-    ] == bodies
     # Check that created classifications were properly stored in SQLite cache
     assert list(CachedClassification.select()) == [
         CachedClassification(

tests/test_elements_worker/test_corpus.py CHANGED Viewed

@@ -2,44 +2,13 @@ import re
 import uuid
 import pytest
+from apistar.exceptions import ErrorResponse
-from arkindex.exceptions import ErrorResponse
 from arkindex_worker.worker.corpus import CorpusExportState
 from tests import CORPUS_ID
 from tests.test_elements_worker import BASE_API_CALLS
-def test_download_export_not_a_uuid(responses, mock_elements_worker):
-    with pytest.raises(ValueError, match="export_id is not a valid uuid."):
-        mock_elements_worker.download_export("mon export")
-def test_download_export(responses, mock_elements_worker):
-    responses.add(
-        responses.GET,
-        "http://testserver/api/v1/export/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff/",
-        status=302,
-        body=b"some SQLite export",
-        content_type="application/x-sqlite3",
-        stream=True,
-    )
-    export = mock_elements_worker.download_export(
-        "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"
-    )
-    assert export.name == "/tmp/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"
-    assert len(responses.calls) == len(BASE_API_CALLS) + 1
-    assert [
-        (call.request.method, call.request.url) for call in responses.calls
-    ] == BASE_API_CALLS + [
-        (
-            "GET",
-            "http://testserver/api/v1/export/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff/",
-        ),
-    ]
 def mock_list_exports_call(responses, export_id):
     responses.add(
         responses.GET,

tests/test_elements_worker/test_dataset.py CHANGED Viewed

@@ -2,8 +2,8 @@ import json
 import logging
 import pytest
+from apistar.exceptions import ErrorResponse
-from arkindex.exceptions import ErrorResponse
 from arkindex_worker.models import Dataset, Element, Set
 from arkindex_worker.worker.dataset import DatasetState
 from tests import PROCESS_ID

arkindex-base-worker 0.4.0__py3-none-any.whl → 0.4.0a2__py3-none-any.whl

arkindex-base-worker 0.4.0py3-none-any.whl → 0.4.0a2py3-none-any.whl