arkindex-base-worker 0.3.7rc10__py3-none-any.whl → 0.4.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {arkindex_base_worker-0.3.7rc10.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/METADATA +10 -16
  2. arkindex_base_worker-0.4.0a1.dist-info/RECORD +51 -0
  3. arkindex_worker/models.py +6 -0
  4. arkindex_worker/utils.py +3 -4
  5. arkindex_worker/worker/__init__.py +23 -2
  6. arkindex_worker/worker/base.py +3 -23
  7. arkindex_worker/worker/corpus.py +69 -0
  8. arkindex_worker/worker/image.py +21 -0
  9. arkindex_worker/worker/training.py +12 -0
  10. tests/__init__.py +8 -0
  11. tests/conftest.py +4 -8
  12. tests/test_base_worker.py +8 -9
  13. tests/test_dataset_worker.py +14 -41
  14. tests/test_elements_worker/test_classifications.py +22 -39
  15. tests/test_elements_worker/test_cli.py +3 -11
  16. tests/test_elements_worker/test_corpus.py +137 -0
  17. tests/test_elements_worker/test_dataset.py +6 -11
  18. tests/test_elements_worker/test_elements.py +106 -85
  19. tests/test_elements_worker/test_entities.py +15 -39
  20. tests/test_elements_worker/test_image.py +65 -0
  21. tests/test_elements_worker/test_metadata.py +6 -40
  22. tests/test_elements_worker/test_task.py +7 -17
  23. tests/test_elements_worker/test_training.py +35 -0
  24. tests/test_elements_worker/test_transcriptions.py +10 -27
  25. tests/test_elements_worker/test_worker.py +2 -1
  26. tests/test_image.py +3 -5
  27. arkindex_base_worker-0.3.7rc10.dist-info/RECORD +0 -47
  28. {arkindex_base_worker-0.3.7rc10.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/LICENSE +0 -0
  29. {arkindex_base_worker-0.3.7rc10.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/WHEEL +0 -0
  30. {arkindex_base_worker-0.3.7rc10.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,7 @@ from apistar.exceptions import ErrorResponse
7
7
 
8
8
  from arkindex_worker.cache import CachedClassification, CachedElement
9
9
  from arkindex_worker.models import Element
10
+ from tests import CORPUS_ID
10
11
 
11
12
  from . import BASE_API_CALLS
12
13
 
@@ -16,10 +17,9 @@ DELETE_PARAMETER = "DELETE_PARAMETER"
16
17
 
17
18
 
18
19
  def test_get_ml_class_id_load_classes(responses, mock_elements_worker):
19
- corpus_id = "11111111-1111-1111-1111-111111111111"
20
20
  responses.add(
21
21
  responses.GET,
22
- f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
22
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
23
23
  status=200,
24
24
  json={
25
25
  "count": 1,
@@ -42,7 +42,7 @@ def test_get_ml_class_id_load_classes(responses, mock_elements_worker):
42
42
  ] == BASE_API_CALLS + [
43
43
  (
44
44
  "GET",
45
- f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
45
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
46
46
  ),
47
47
  ]
48
48
  assert mock_elements_worker.classes == {"good": "0000"}
@@ -51,12 +51,11 @@ def test_get_ml_class_id_load_classes(responses, mock_elements_worker):
51
51
 
52
52
  def test_get_ml_class_id_inexistant_class(mock_elements_worker, responses):
53
53
  # A missing class is now created automatically
54
- corpus_id = "11111111-1111-1111-1111-111111111111"
55
54
  mock_elements_worker.classes = {"good": "0000"}
56
55
 
57
56
  responses.add(
58
57
  responses.POST,
59
- f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
58
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
60
59
  status=201,
61
60
  json={"id": "new-ml-class-1234"},
62
61
  )
@@ -82,12 +81,10 @@ def test_get_ml_class_id(mock_elements_worker):
82
81
 
83
82
 
84
83
  def test_get_ml_class_reload(responses, mock_elements_worker):
85
- corpus_id = "11111111-1111-1111-1111-111111111111"
86
-
87
84
  # Add some initial classes
88
85
  responses.add(
89
86
  responses.GET,
90
- f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
87
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
91
88
  json={
92
89
  "count": 1,
93
90
  "next": None,
@@ -103,7 +100,7 @@ def test_get_ml_class_reload(responses, mock_elements_worker):
103
100
  # Invalid response when trying to create class2
104
101
  responses.add(
105
102
  responses.POST,
106
- f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
103
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
107
104
  status=400,
108
105
  json={"non_field_errors": "Already exists"},
109
106
  )
@@ -111,7 +108,7 @@ def test_get_ml_class_reload(responses, mock_elements_worker):
111
108
  # Add both classes (class2 is created by another process)
112
109
  responses.add(
113
110
  responses.GET,
114
- f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
111
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
115
112
  json={
116
113
  "count": 2,
117
114
  "next": None,
@@ -141,15 +138,15 @@ def test_get_ml_class_reload(responses, mock_elements_worker):
141
138
  ] == BASE_API_CALLS + [
142
139
  (
143
140
  "GET",
144
- f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
141
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
145
142
  ),
146
143
  (
147
144
  "POST",
148
- f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
145
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
149
146
  ),
150
147
  (
151
148
  "GET",
152
- f"http://testserver/api/v1/corpus/{corpus_id}/classes/",
149
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
153
150
  ),
154
151
  ]
155
152
 
@@ -169,7 +166,7 @@ def test_retrieve_ml_class_not_in_cache(responses, mock_elements_worker):
169
166
  """
170
167
  responses.add(
171
168
  responses.GET,
172
- f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/classes/",
169
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
173
170
  status=200,
174
171
  json={
175
172
  "count": 1,
@@ -189,7 +186,7 @@ def test_retrieve_ml_class_not_in_cache(responses, mock_elements_worker):
189
186
  ] == BASE_API_CALLS + [
190
187
  (
191
188
  "GET",
192
- f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/classes/",
189
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
193
190
  ),
194
191
  ]
195
192
 
@@ -276,7 +273,7 @@ def test_create_classification_api_error(responses, mock_elements_worker):
276
273
  responses.add(
277
274
  responses.POST,
278
275
  "http://testserver/api/v1/classifications/",
279
- status=500,
276
+ status=418,
280
277
  )
281
278
 
282
279
  with pytest.raises(ErrorResponse):
@@ -287,17 +284,10 @@ def test_create_classification_api_error(responses, mock_elements_worker):
287
284
  high_confidence=True,
288
285
  )
289
286
 
290
- assert len(responses.calls) == len(BASE_API_CALLS) + 5
287
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
291
288
  assert [
292
289
  (call.request.method, call.request.url) for call in responses.calls
293
- ] == BASE_API_CALLS + [
294
- # We retry 5 times the API call
295
- ("POST", "http://testserver/api/v1/classifications/"),
296
- ("POST", "http://testserver/api/v1/classifications/"),
297
- ("POST", "http://testserver/api/v1/classifications/"),
298
- ("POST", "http://testserver/api/v1/classifications/"),
299
- ("POST", "http://testserver/api/v1/classifications/"),
300
- ]
290
+ ] == BASE_API_CALLS + [("POST", "http://testserver/api/v1/classifications/")]
301
291
 
302
292
 
303
293
  def test_create_classification_create_ml_class(mock_elements_worker, responses):
@@ -306,7 +296,7 @@ def test_create_classification_create_ml_class(mock_elements_worker, responses):
306
296
  # Automatically create a missing class!
307
297
  responses.add(
308
298
  responses.POST,
309
- "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/classes/",
299
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
310
300
  status=201,
311
301
  json={"id": "new-ml-class-1234"},
312
302
  )
@@ -330,7 +320,7 @@ def test_create_classification_create_ml_class(mock_elements_worker, responses):
330
320
  for call in responses.calls[-2:]
331
321
  ] == [
332
322
  (
333
- "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/classes/",
323
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
334
324
  {"name": "a_class"},
335
325
  ),
336
326
  (
@@ -609,7 +599,7 @@ def test_create_classifications_api_error(responses, mock_elements_worker):
609
599
  responses.add(
610
600
  responses.POST,
611
601
  "http://testserver/api/v1/classification/bulk/",
612
- status=500,
602
+ status=418,
613
603
  )
614
604
  elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
615
605
  classes = [
@@ -630,17 +620,10 @@ def test_create_classifications_api_error(responses, mock_elements_worker):
630
620
  element=elt, classifications=classes
631
621
  )
632
622
 
633
- assert len(responses.calls) == len(BASE_API_CALLS) + 5
623
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
634
624
  assert [
635
625
  (call.request.method, call.request.url) for call in responses.calls
636
- ] == BASE_API_CALLS + [
637
- # We retry 5 times the API call
638
- ("POST", "http://testserver/api/v1/classification/bulk/"),
639
- ("POST", "http://testserver/api/v1/classification/bulk/"),
640
- ("POST", "http://testserver/api/v1/classification/bulk/"),
641
- ("POST", "http://testserver/api/v1/classification/bulk/"),
642
- ("POST", "http://testserver/api/v1/classification/bulk/"),
643
- ]
626
+ ] == BASE_API_CALLS + [("POST", "http://testserver/api/v1/classification/bulk/")]
644
627
 
645
628
 
646
629
  def test_create_classifications_create_ml_class(mock_elements_worker, responses):
@@ -649,7 +632,7 @@ def test_create_classifications_create_ml_class(mock_elements_worker, responses)
649
632
  # Automatically create a missing class!
650
633
  responses.add(
651
634
  responses.POST,
652
- "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/classes/",
635
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
653
636
  status=201,
654
637
  json={"id": "new-ml-class-1234"},
655
638
  )
@@ -690,7 +673,7 @@ def test_create_classifications_create_ml_class(mock_elements_worker, responses)
690
673
  ] == BASE_API_CALLS + [
691
674
  (
692
675
  "POST",
693
- "http://testserver/api/v1/corpus/11111111-1111-1111-1111-111111111111/classes/",
676
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
694
677
  ),
695
678
  ("POST", "http://testserver/api/v1/classification/bulk/"),
696
679
  ]
@@ -2,7 +2,6 @@ import json
2
2
  import sys
3
3
  import tempfile
4
4
  from pathlib import Path
5
- from uuid import UUID
6
5
 
7
6
  import pytest
8
7
 
@@ -58,13 +57,6 @@ def test_cli_arg_elements_list_given(mocker):
58
57
  path.unlink()
59
58
 
60
59
 
61
- def test_cli_arg_element_one_given_not_uuid(mocker):
62
- mocker.patch.object(sys, "argv", ["worker", "--element", "1234"])
63
- worker = ElementsWorker()
64
- with pytest.raises(SystemExit):
65
- worker.configure()
66
-
67
-
68
60
  @pytest.mark.usefixtures("_mock_worker_run_api")
69
61
  def test_cli_arg_element_one_given(mocker):
70
62
  mocker.patch.object(
@@ -73,7 +65,7 @@ def test_cli_arg_element_one_given(mocker):
73
65
  worker = ElementsWorker()
74
66
  worker.configure()
75
67
 
76
- assert worker.args.element == [UUID("12341234-1234-1234-1234-123412341234")]
68
+ assert worker.args.element == ["12341234-1234-1234-1234-123412341234"]
77
69
  # elements_list is None because TASK_ELEMENTS environment variable isn't set
78
70
  assert not worker.args.elements_list
79
71
 
@@ -94,8 +86,8 @@ def test_cli_arg_element_many_given(mocker):
94
86
  worker.configure()
95
87
 
96
88
  assert worker.args.element == [
97
- UUID("12341234-1234-1234-1234-123412341234"),
98
- UUID("43214321-4321-4321-4321-432143214321"),
89
+ "12341234-1234-1234-1234-123412341234",
90
+ "43214321-4321-4321-4321-432143214321",
99
91
  ]
100
92
  # elements_list is None because TASK_ELEMENTS environment variable isn't set
101
93
  assert not worker.args.elements_list
@@ -0,0 +1,137 @@
1
+ import re
2
+ import uuid
3
+
4
+ import pytest
5
+ from apistar.exceptions import ErrorResponse
6
+
7
+ from arkindex_worker.worker.corpus import CorpusExportState
8
+ from tests import CORPUS_ID
9
+ from tests.test_elements_worker import BASE_API_CALLS
10
+
11
+
12
+ def mock_list_exports_call(responses, export_id):
13
+ responses.add(
14
+ responses.GET,
15
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/",
16
+ status=200,
17
+ json={
18
+ "count": len(CorpusExportState),
19
+ "next": None,
20
+ "results": [
21
+ {
22
+ "id": str(uuid.uuid4())
23
+ if state != CorpusExportState.Done
24
+ else export_id,
25
+ "created": "2019-08-24T14:15:22Z",
26
+ "updated": "2019-08-24T14:15:22Z",
27
+ "corpus_id": CORPUS_ID,
28
+ "user": {
29
+ "id": 0,
30
+ "email": "user@example.com",
31
+ "display_name": "User",
32
+ },
33
+ "state": state.value,
34
+ "source": "default",
35
+ }
36
+ for state in CorpusExportState
37
+ ],
38
+ },
39
+ )
40
+
41
+
42
+ def test_download_latest_export_list_error(responses, mock_elements_worker):
43
+ responses.add(
44
+ responses.GET,
45
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/",
46
+ status=418,
47
+ )
48
+
49
+ with pytest.raises(
50
+ Exception, match="Stopping pagination as data will be incomplete"
51
+ ):
52
+ mock_elements_worker.download_latest_export()
53
+
54
+ assert len(responses.calls) == len(BASE_API_CALLS) + 5
55
+ assert [
56
+ (call.request.method, call.request.url) for call in responses.calls
57
+ ] == BASE_API_CALLS + [
58
+ # The API call is retried 5 times
59
+ ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
60
+ ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
61
+ ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
62
+ ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
63
+ ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
64
+ ]
65
+
66
+
67
+ def test_download_latest_export_no_available_exports(responses, mock_elements_worker):
68
+ responses.add(
69
+ responses.GET,
70
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/",
71
+ status=200,
72
+ json={
73
+ "count": 0,
74
+ "next": None,
75
+ "results": [],
76
+ },
77
+ )
78
+
79
+ with pytest.raises(
80
+ AssertionError,
81
+ match=re.escape(
82
+ f'No available exports found for the corpus ({CORPUS_ID}) with state "Done".'
83
+ ),
84
+ ):
85
+ mock_elements_worker.download_latest_export()
86
+
87
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
88
+ assert [
89
+ (call.request.method, call.request.url) for call in responses.calls
90
+ ] == BASE_API_CALLS + [
91
+ ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
92
+ ]
93
+
94
+
95
+ def test_download_latest_export_download_error(responses, mock_elements_worker):
96
+ export_id = str(uuid.uuid4())
97
+ mock_list_exports_call(responses, export_id)
98
+ responses.add(
99
+ responses.GET,
100
+ f"http://testserver/api/v1/export/{export_id}/",
101
+ status=418,
102
+ )
103
+
104
+ with pytest.raises(ErrorResponse):
105
+ mock_elements_worker.download_latest_export()
106
+
107
+ assert len(responses.calls) == len(BASE_API_CALLS) + 2
108
+ assert [
109
+ (call.request.method, call.request.url) for call in responses.calls
110
+ ] == BASE_API_CALLS + [
111
+ ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
112
+ ("GET", f"http://testserver/api/v1/export/{export_id}/"),
113
+ ]
114
+
115
+
116
+ def test_download_latest_export(responses, mock_elements_worker):
117
+ export_id = str(uuid.uuid4())
118
+ mock_list_exports_call(responses, export_id)
119
+ responses.add(
120
+ responses.GET,
121
+ f"http://testserver/api/v1/export/{export_id}/",
122
+ status=302,
123
+ body=b"some SQLite export",
124
+ content_type="application/x-sqlite3",
125
+ stream=True,
126
+ )
127
+
128
+ export = mock_elements_worker.download_latest_export()
129
+ assert export.name == f"/tmp/{export_id}"
130
+
131
+ assert len(responses.calls) == len(BASE_API_CALLS) + 2
132
+ assert [
133
+ (call.request.method, call.request.url) for call in responses.calls
134
+ ] == BASE_API_CALLS + [
135
+ ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
136
+ ("GET", f"http://testserver/api/v1/export/{export_id}/"),
137
+ ]
@@ -6,7 +6,7 @@ from apistar.exceptions import ErrorResponse
6
6
 
7
7
  from arkindex_worker.models import Dataset, Element, Set
8
8
  from arkindex_worker.worker.dataset import DatasetState
9
- from tests.conftest import PROCESS_ID
9
+ from tests import PROCESS_ID
10
10
  from tests.test_elements_worker import BASE_API_CALLS
11
11
 
12
12
 
@@ -25,7 +25,7 @@ def test_list_process_sets_api_error(responses, mock_dataset_worker):
25
25
  responses.add(
26
26
  responses.GET,
27
27
  f"http://testserver/api/v1/process/{PROCESS_ID}/sets/",
28
- status=500,
28
+ status=418,
29
29
  )
30
30
 
31
31
  with pytest.raises(
@@ -152,7 +152,7 @@ def test_list_set_elements_api_error(
152
152
  responses.add(
153
153
  responses.GET,
154
154
  f"http://testserver/api/v1/datasets/{default_dataset.id}/elements/{query_params}",
155
- status=500,
155
+ status=418,
156
156
  )
157
157
 
158
158
  with pytest.raises(
@@ -321,7 +321,7 @@ def test_update_dataset_state_api_error(
321
321
  responses.add(
322
322
  responses.PATCH,
323
323
  f"http://testserver/api/v1/datasets/{default_dataset.id}/",
324
- status=500,
324
+ status=418,
325
325
  )
326
326
 
327
327
  with pytest.raises(ErrorResponse):
@@ -330,16 +330,11 @@ def test_update_dataset_state_api_error(
330
330
  state=DatasetState.Building,
331
331
  )
332
332
 
333
- assert len(responses.calls) == len(BASE_API_CALLS) + 5
333
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
334
334
  assert [
335
335
  (call.request.method, call.request.url) for call in responses.calls
336
336
  ] == BASE_API_CALLS + [
337
- # We retry 5 times the API call
338
- ("PATCH", f"http://testserver/api/v1/datasets/{default_dataset.id}/"),
339
- ("PATCH", f"http://testserver/api/v1/datasets/{default_dataset.id}/"),
340
- ("PATCH", f"http://testserver/api/v1/datasets/{default_dataset.id}/"),
341
- ("PATCH", f"http://testserver/api/v1/datasets/{default_dataset.id}/"),
342
- ("PATCH", f"http://testserver/api/v1/datasets/{default_dataset.id}/"),
337
+ ("PATCH", f"http://testserver/api/v1/datasets/{default_dataset.id}/")
343
338
  ]
344
339
 
345
340