PyPI - arkindex-base-worker - Versions diffs - 0.3.7rc10__py3-none-any.whl → 0.4.0a1__py3-none-any.whl - Mend

arkindex-base-worker 0.3.7rc10py3-none-any.whl → 0.4.0a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{arkindex_base_worker-0.3.7rc10.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/METADATA +10 -16
arkindex_base_worker-0.4.0a1.dist-info/RECORD +51 -0
arkindex_worker/models.py +6 -0
arkindex_worker/utils.py +3 -4
arkindex_worker/worker/__init__.py +23 -2
arkindex_worker/worker/base.py +3 -23
arkindex_worker/worker/corpus.py +69 -0
arkindex_worker/worker/image.py +21 -0
arkindex_worker/worker/training.py +12 -0
tests/__init__.py +8 -0
tests/conftest.py +4 -8
tests/test_base_worker.py +8 -9
tests/test_dataset_worker.py +14 -41
tests/test_elements_worker/test_classifications.py +22 -39
tests/test_elements_worker/test_cli.py +3 -11
tests/test_elements_worker/test_corpus.py +137 -0
tests/test_elements_worker/test_dataset.py +6 -11
tests/test_elements_worker/test_elements.py +106 -85
tests/test_elements_worker/test_entities.py +15 -39
tests/test_elements_worker/test_image.py +65 -0
tests/test_elements_worker/test_metadata.py +6 -40
tests/test_elements_worker/test_task.py +7 -17
tests/test_elements_worker/test_training.py +35 -0
tests/test_elements_worker/test_transcriptions.py +10 -27
tests/test_elements_worker/test_worker.py +2 -1
tests/test_image.py +3 -5
arkindex_base_worker-0.3.7rc10.dist-info/RECORD +0 -47
{arkindex_base_worker-0.3.7rc10.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/LICENSE +0 -0
{arkindex_base_worker-0.3.7rc10.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/WHEEL +0 -0
{arkindex_base_worker-0.3.7rc10.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/top_level.txt +0 -0

tests/test_elements_worker/test_classifications.py CHANGED Viewed

@@ -7,6 +7,7 @@ from apistar.exceptions import ErrorResponse
 from arkindex_worker.cache import CachedClassification, CachedElement
 from arkindex_worker.models import Element
+from tests import CORPUS_ID
 from . import BASE_API_CALLS
@@ -16,10 +17,9 @@ DELETE_PARAMETER = "DELETE_PARAMETER"
 def test_get_ml_class_id_load_classes(responses, mock_elements_worker):
-    corpus_id = "11111111-1111-1111-1111-111111111111"
     responses.add(
         responses.GET,
-        f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         status=200,
         json={
             "count": 1,
@@ -42,7 +42,7 @@ def test_get_ml_class_id_load_classes(responses, mock_elements_worker):
     ] == BASE_API_CALLS + [
         (
             "GET",
-            f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         ),
     ]
     assert mock_elements_worker.classes == {"good": "0000"}
@@ -51,12 +51,11 @@ def test_get_ml_class_id_load_classes(responses, mock_elements_worker):
 def test_get_ml_class_id_inexistant_class(mock_elements_worker, responses):
     # A missing class is now created automatically
-    corpus_id = "11111111-1111-1111-1111-111111111111"
     mock_elements_worker.classes = {"good": "0000"}
     responses.add(
         responses.POST,
-        f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         status=201,
         json={"id": "new-ml-class-1234"},
     )
@@ -82,12 +81,10 @@ def test_get_ml_class_id(mock_elements_worker):
 def test_get_ml_class_reload(responses, mock_elements_worker):
-    corpus_id = "11111111-1111-1111-1111-111111111111"
     # Add some initial classes
     responses.add(
         responses.GET,
-        f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         json={
             "count": 1,
             "next": None,
@@ -103,7 +100,7 @@ def test_get_ml_class_reload(responses, mock_elements_worker):
     # Invalid response when trying to create class2
     responses.add(
         responses.POST,
-        f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         status=400,
         json={"non_field_errors": "Already exists"},
     )
@@ -111,7 +108,7 @@ def test_get_ml_class_reload(responses, mock_elements_worker):
     # Add both classes (class2 is created by another process)
     responses.add(
         responses.GET,
-        f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         json={
             "count": 2,
             "next": None,
@@ -141,15 +138,15 @@ def test_get_ml_class_reload(responses, mock_elements_worker):
     ] == BASE_API_CALLS + [
         (
             "GET",
-            f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         ),
         (
             "POST",
-            f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         ),
         (
             "GET",
-            f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         ),
     ]
@@ -169,7 +166,7 @@ def test_retrieve_ml_class_not_in_cache(responses, mock_elements_worker):
     """
     responses.add(
         responses.GET,
-        f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/classes/",
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         status=200,
         json={
             "count": 1,
@@ -189,7 +186,7 @@ def test_retrieve_ml_class_not_in_cache(responses, mock_elements_worker):
     ] == BASE_API_CALLS + [
         (
             "GET",
-            f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/classes/",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         ),
     ]
@@ -276,7 +273,7 @@ def test_create_classification_api_error(responses, mock_elements_worker):
     responses.add(
         responses.POST,
         "http://testserver/api/v1/classifications/",
-        status=500,
+        status=418,
     )
     with pytest.raises(ErrorResponse):
@@ -287,17 +284,10 @@ def test_create_classification_api_error(responses, mock_elements_worker):
             high_confidence=True,
         )
-    assert len(responses.calls) == len(BASE_API_CALLS) + 5
+    assert len(responses.calls) == len(BASE_API_CALLS) + 1
     assert [
         (call.request.method, call.request.url) for call in responses.calls
-    ] == BASE_API_CALLS + [
-        # We retry 5 times the API call
-        ("POST", "http://testserver/api/v1/classifications/"),
-        ("POST", "http://testserver/api/v1/classifications/"),
-        ("POST", "http://testserver/api/v1/classifications/"),
-        ("POST", "http://testserver/api/v1/classifications/"),
-        ("POST", "http://testserver/api/v1/classifications/"),
-    ]
+    ] == BASE_API_CALLS + [("POST", "http://testserver/api/v1/classifications/")]
 def test_create_classification_create_ml_class(mock_elements_worker, responses):
@@ -306,7 +296,7 @@ def test_create_classification_create_ml_class(mock_elements_worker, responses):
     # Automatically create a missing class!
     responses.add(
         responses.POST,
-        "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/classes/",
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         status=201,
         json={"id": "new-ml-class-1234"},
     )
@@ -330,7 +320,7 @@ def test_create_classification_create_ml_class(mock_elements_worker, responses):
         for call in responses.calls[-2:]
     ] == [
         (
-            "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/classes/",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
             {"name": "a_class"},
         ),
         (
@@ -609,7 +599,7 @@ def test_create_classifications_api_error(responses, mock_elements_worker):
     responses.add(
         responses.POST,
         "http://testserver/api/v1/classification/bulk/",
-        status=500,
+        status=418,
     )
     elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
     classes = [
@@ -630,17 +620,10 @@ def test_create_classifications_api_error(responses, mock_elements_worker):
             element=elt, classifications=classes
         )
-    assert len(responses.calls) == len(BASE_API_CALLS) + 5
+    assert len(responses.calls) == len(BASE_API_CALLS) + 1
     assert [
         (call.request.method, call.request.url) for call in responses.calls
-    ] == BASE_API_CALLS + [
-        # We retry 5 times the API call
-        ("POST", "http://testserver/api/v1/classification/bulk/"),
-        ("POST", "http://testserver/api/v1/classification/bulk/"),
-        ("POST", "http://testserver/api/v1/classification/bulk/"),
-        ("POST", "http://testserver/api/v1/classification/bulk/"),
-        ("POST", "http://testserver/api/v1/classification/bulk/"),
-    ]
+    ] == BASE_API_CALLS + [("POST", "http://testserver/api/v1/classification/bulk/")]
 def test_create_classifications_create_ml_class(mock_elements_worker, responses):
@@ -649,7 +632,7 @@ def test_create_classifications_create_ml_class(mock_elements_worker, responses)
     # Automatically create a missing class!
     responses.add(
         responses.POST,
-        "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/classes/",
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         status=201,
         json={"id": "new-ml-class-1234"},
     )
@@ -690,7 +673,7 @@ def test_create_classifications_create_ml_class(mock_elements_worker, responses)
     ] == BASE_API_CALLS + [
         (
             "POST",
-            "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/classes/",
+            f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
         ),
         ("POST", "http://testserver/api/v1/classification/bulk/"),
     ]

tests/test_elements_worker/test_cli.py CHANGED Viewed

@@ -2,7 +2,6 @@ import json
 import sys
 import tempfile
 from pathlib import Path
-from uuid import UUID
 import pytest
@@ -58,13 +57,6 @@ def test_cli_arg_elements_list_given(mocker):
     path.unlink()
-def test_cli_arg_element_one_given_not_uuid(mocker):
-    mocker.patch.object(sys, "argv", ["worker", "--element", "1234"])
-    worker = ElementsWorker()
-    with pytest.raises(SystemExit):
-        worker.configure()
 @pytest.mark.usefixtures("_mock_worker_run_api")
 def test_cli_arg_element_one_given(mocker):
     mocker.patch.object(
@@ -73,7 +65,7 @@ def test_cli_arg_element_one_given(mocker):
     worker = ElementsWorker()
     worker.configure()
-    assert worker.args.element == [UUID("12341234-1234-1234-1234-123412341234")]
+    assert worker.args.element == ["12341234-1234-1234-1234-123412341234"]
     # elements_list is None because TASK_ELEMENTS environment variable isn't set
     assert not worker.args.elements_list
@@ -94,8 +86,8 @@ def test_cli_arg_element_many_given(mocker):
     worker.configure()
     assert worker.args.element == [
-        UUID("12341234-1234-1234-1234-123412341234"),
-        UUID("43214321-4321-4321-4321-432143214321"),
+        "12341234-1234-1234-1234-123412341234",
+        "43214321-4321-4321-4321-432143214321",
     ]
     # elements_list is None because TASK_ELEMENTS environment variable isn't set
     assert not worker.args.elements_list

tests/test_elements_worker/test_corpus.py ADDED Viewed

@@ -0,0 +1,137 @@
+import re
+import uuid
+import pytest
+from apistar.exceptions import ErrorResponse
+from arkindex_worker.worker.corpus import CorpusExportState
+from tests import CORPUS_ID
+from tests.test_elements_worker import BASE_API_CALLS
+def mock_list_exports_call(responses, export_id):
+    responses.add(
+        responses.GET,
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/",
+        status=200,
+        json={
+            "count": len(CorpusExportState),
+            "next": None,
+            "results": [
+                {
+                    "id": str(uuid.uuid4())
+                    if state != CorpusExportState.Done
+                    else export_id,
+                    "created": "2019-08-24T14:15:22Z",
+                    "updated": "2019-08-24T14:15:22Z",
+                    "corpus_id": CORPUS_ID,
+                    "user": {
+                        "id": 0,
+                        "email": "user@example.com",
+                        "display_name": "User",
+                    },
+                    "state": state.value,
+                    "source": "default",
+                }
+                for state in CorpusExportState
+            ],
+        },
+    )
+def test_download_latest_export_list_error(responses, mock_elements_worker):
+    responses.add(
+        responses.GET,
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/",
+        status=418,
+    )
+    with pytest.raises(
+        Exception, match="Stopping pagination as data will be incomplete"
+    ):
+        mock_elements_worker.download_latest_export()
+    assert len(responses.calls) == len(BASE_API_CALLS) + 5
+    assert [
+        (call.request.method, call.request.url) for call in responses.calls
+    ] == BASE_API_CALLS + [
+        # The API call is retried 5 times
+        ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
+        ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
+        ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
+        ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
+        ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
+    ]
+def test_download_latest_export_no_available_exports(responses, mock_elements_worker):
+    responses.add(
+        responses.GET,
+        f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/",
+        status=200,
+        json={
+            "count": 0,
+            "next": None,
+            "results": [],
+        },
+    )
+    with pytest.raises(
+        AssertionError,
+        match=re.escape(
+            f'No available exports found for the corpus ({CORPUS_ID}) with state "Done".'
+        ),
+    ):
+        mock_elements_worker.download_latest_export()
+    assert len(responses.calls) == len(BASE_API_CALLS) + 1
+    assert [
+        (call.request.method, call.request.url) for call in responses.calls
+    ] == BASE_API_CALLS + [
+        ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
+    ]
+def test_download_latest_export_download_error(responses, mock_elements_worker):
+    export_id = str(uuid.uuid4())
+    mock_list_exports_call(responses, export_id)
+    responses.add(
+        responses.GET,
+        f"http://testserver/api/v1/export/{export_id}/",
+        status=418,
+    )
+    with pytest.raises(ErrorResponse):
+        mock_elements_worker.download_latest_export()
+    assert len(responses.calls) == len(BASE_API_CALLS) + 2
+    assert [
+        (call.request.method, call.request.url) for call in responses.calls
+    ] == BASE_API_CALLS + [
+        ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
+        ("GET", f"http://testserver/api/v1/export/{export_id}/"),
+    ]
+def test_download_latest_export(responses, mock_elements_worker):
+    export_id = str(uuid.uuid4())
+    mock_list_exports_call(responses, export_id)
+    responses.add(
+        responses.GET,
+        f"http://testserver/api/v1/export/{export_id}/",
+        status=302,
+        body=b"some SQLite export",
+        content_type="application/x-sqlite3",
+        stream=True,
+    )
+    export = mock_elements_worker.download_latest_export()
+    assert export.name == f"/tmp/{export_id}"
+    assert len(responses.calls) == len(BASE_API_CALLS) + 2
+    assert [
+        (call.request.method, call.request.url) for call in responses.calls
+    ] == BASE_API_CALLS + [
+        ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
+        ("GET", f"http://testserver/api/v1/export/{export_id}/"),
+    ]

tests/test_elements_worker/test_dataset.py CHANGED Viewed

@@ -6,7 +6,7 @@ from apistar.exceptions import ErrorResponse
 from arkindex_worker.models import Dataset, Element, Set
 from arkindex_worker.worker.dataset import DatasetState
-from tests.conftest import PROCESS_ID
+from tests import PROCESS_ID
 from tests.test_elements_worker import BASE_API_CALLS
@@ -25,7 +25,7 @@ def test_list_process_sets_api_error(responses, mock_dataset_worker):
     responses.add(
         responses.GET,
         f"http://testserver/api/v1/process/{PROCESS_ID}/sets/",
-        status=500,
+        status=418,
     )
     with pytest.raises(
@@ -152,7 +152,7 @@ def test_list_set_elements_api_error(
     responses.add(
         responses.GET,
         f"http://testserver/api/v1/datasets/{default_dataset.id}/elements/{query_params}",
-        status=500,
+        status=418,
     )
     with pytest.raises(
@@ -321,7 +321,7 @@ def test_update_dataset_state_api_error(
     responses.add(
         responses.PATCH,
         f"http://testserver/api/v1/datasets/{default_dataset.id}/",
-        status=500,
+        status=418,
     )
     with pytest.raises(ErrorResponse):
@@ -330,16 +330,11 @@ def test_update_dataset_state_api_error(
             state=DatasetState.Building,
         )
-    assert len(responses.calls) == len(BASE_API_CALLS) + 5
+    assert len(responses.calls) == len(BASE_API_CALLS) + 1
     assert [
         (call.request.method, call.request.url) for call in responses.calls
     ] == BASE_API_CALLS + [
-        # We retry 5 times the API call
-        ("PATCH", f"http://testserver/api/v1/datasets/{default_dataset.id}/"),
-        ("PATCH", f"http://testserver/api/v1/datasets/{default_dataset.id}/"),
-        ("PATCH", f"http://testserver/api/v1/datasets/{default_dataset.id}/"),
-        ("PATCH", f"http://testserver/api/v1/datasets/{default_dataset.id}/"),
-        ("PATCH", f"http://testserver/api/v1/datasets/{default_dataset.id}/"),
+        ("PATCH", f"http://testserver/api/v1/datasets/{default_dataset.id}/")
     ]

arkindex-base-worker 0.3.7rc10__py3-none-any.whl → 0.4.0a1__py3-none-any.whl

arkindex-base-worker 0.3.7rc10py3-none-any.whl → 0.4.0a1py3-none-any.whl