arkindex-base-worker 0.3.7rc9__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.3.7rc9.dist-info → arkindex_base_worker-0.4.0.dist-info}/METADATA +16 -20
- arkindex_base_worker-0.4.0.dist-info/RECORD +61 -0
- {arkindex_base_worker-0.3.7rc9.dist-info → arkindex_base_worker-0.4.0.dist-info}/WHEEL +1 -1
- arkindex_worker/cache.py +1 -1
- arkindex_worker/image.py +120 -1
- arkindex_worker/models.py +6 -0
- arkindex_worker/utils.py +85 -4
- arkindex_worker/worker/__init__.py +68 -162
- arkindex_worker/worker/base.py +39 -34
- arkindex_worker/worker/classification.py +34 -18
- arkindex_worker/worker/corpus.py +86 -0
- arkindex_worker/worker/dataset.py +71 -1
- arkindex_worker/worker/element.py +352 -91
- arkindex_worker/worker/entity.py +11 -11
- arkindex_worker/worker/image.py +21 -0
- arkindex_worker/worker/metadata.py +19 -9
- arkindex_worker/worker/process.py +92 -0
- arkindex_worker/worker/task.py +5 -4
- arkindex_worker/worker/training.py +25 -10
- arkindex_worker/worker/transcription.py +89 -68
- arkindex_worker/worker/version.py +3 -1
- tests/__init__.py +8 -0
- tests/conftest.py +36 -52
- tests/test_base_worker.py +212 -12
- tests/test_dataset_worker.py +21 -45
- tests/test_elements_worker/{test_classifications.py → test_classification.py} +216 -100
- tests/test_elements_worker/test_cli.py +3 -11
- tests/test_elements_worker/test_corpus.py +168 -0
- tests/test_elements_worker/test_dataset.py +7 -12
- tests/test_elements_worker/test_element.py +427 -0
- tests/test_elements_worker/test_element_create_multiple.py +715 -0
- tests/test_elements_worker/test_element_create_single.py +528 -0
- tests/test_elements_worker/test_element_list_children.py +969 -0
- tests/test_elements_worker/test_element_list_parents.py +530 -0
- tests/test_elements_worker/{test_entities.py → test_entity_create.py} +37 -195
- tests/test_elements_worker/test_entity_list_and_check.py +160 -0
- tests/test_elements_worker/test_image.py +66 -0
- tests/test_elements_worker/test_metadata.py +230 -139
- tests/test_elements_worker/test_process.py +89 -0
- tests/test_elements_worker/test_task.py +8 -18
- tests/test_elements_worker/test_training.py +17 -8
- tests/test_elements_worker/test_transcription_create.py +873 -0
- tests/test_elements_worker/test_transcription_create_with_elements.py +951 -0
- tests/test_elements_worker/test_transcription_list.py +450 -0
- tests/test_elements_worker/test_version.py +60 -0
- tests/test_elements_worker/test_worker.py +563 -279
- tests/test_image.py +432 -209
- tests/test_merge.py +1 -2
- tests/test_utils.py +66 -3
- arkindex_base_worker-0.3.7rc9.dist-info/RECORD +0 -47
- tests/test_elements_worker/test_elements.py +0 -2713
- tests/test_elements_worker/test_transcriptions.py +0 -2119
- {arkindex_base_worker-0.3.7rc9.dist-info → arkindex_base_worker-0.4.0.dist-info}/LICENSE +0 -0
- {arkindex_base_worker-0.3.7rc9.dist-info → arkindex_base_worker-0.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,89 +1,572 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import sys
|
|
3
|
+
from argparse import Namespace
|
|
4
|
+
from uuid import UUID
|
|
3
5
|
|
|
4
6
|
import pytest
|
|
5
|
-
from apistar.exceptions import ErrorResponse
|
|
6
7
|
|
|
7
|
-
from
|
|
8
|
+
from arkindex.exceptions import ErrorResponse
|
|
9
|
+
from arkindex_worker.cache import (
|
|
10
|
+
SQL_VERSION,
|
|
11
|
+
CachedElement,
|
|
12
|
+
create_version_table,
|
|
13
|
+
init_cache_db,
|
|
14
|
+
)
|
|
15
|
+
from arkindex_worker.models import Element
|
|
8
16
|
from arkindex_worker.worker import ActivityState, ElementsWorker
|
|
17
|
+
from arkindex_worker.worker.dataset import DatasetState
|
|
18
|
+
from arkindex_worker.worker.process import ProcessMode
|
|
19
|
+
from tests import PROCESS_ID
|
|
9
20
|
|
|
10
21
|
from . import BASE_API_CALLS
|
|
11
22
|
|
|
12
|
-
TEST_VERSION_ID = "test_123"
|
|
13
|
-
TEST_SLUG = "some_slug"
|
|
14
23
|
|
|
24
|
+
def test_database_arg(mocker, mock_elements_worker, tmp_path):
|
|
25
|
+
database_path = tmp_path / "my_database.sqlite"
|
|
26
|
+
init_cache_db(database_path)
|
|
27
|
+
create_version_table()
|
|
28
|
+
|
|
29
|
+
mocker.patch(
|
|
30
|
+
"arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
|
|
31
|
+
return_value=Namespace(
|
|
32
|
+
element=["volumeid", "pageid"],
|
|
33
|
+
verbose=False,
|
|
34
|
+
elements_list=None,
|
|
35
|
+
database=database_path,
|
|
36
|
+
dev=False,
|
|
37
|
+
set=[],
|
|
38
|
+
),
|
|
39
|
+
)
|
|
15
40
|
|
|
16
|
-
|
|
17
|
-
|
|
41
|
+
worker = ElementsWorker(support_cache=True)
|
|
42
|
+
worker.configure()
|
|
18
43
|
|
|
19
|
-
|
|
44
|
+
assert worker.use_cache is True
|
|
45
|
+
assert worker.cache_path == database_path
|
|
20
46
|
|
|
21
|
-
api_client.add_response("RetrieveWorkerVersion", response, id=TEST_VERSION_ID)
|
|
22
47
|
|
|
23
|
-
|
|
24
|
-
|
|
48
|
+
def test_database_arg_cache_missing_version_table(
|
|
49
|
+
mocker, mock_elements_worker, tmp_path
|
|
50
|
+
):
|
|
51
|
+
database_path = tmp_path / "my_database.sqlite"
|
|
52
|
+
database_path.touch()
|
|
53
|
+
|
|
54
|
+
mocker.patch(
|
|
55
|
+
"arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
|
|
56
|
+
return_value=Namespace(
|
|
57
|
+
element=["volumeid", "pageid"],
|
|
58
|
+
verbose=False,
|
|
59
|
+
elements_list=None,
|
|
60
|
+
database=database_path,
|
|
61
|
+
dev=False,
|
|
62
|
+
set=[],
|
|
63
|
+
),
|
|
64
|
+
)
|
|
25
65
|
|
|
26
|
-
|
|
27
|
-
|
|
66
|
+
worker = ElementsWorker(support_cache=True)
|
|
67
|
+
with pytest.raises(
|
|
68
|
+
AssertionError,
|
|
69
|
+
match=f"The SQLite database {database_path} does not have the correct cache version, it should be {SQL_VERSION}",
|
|
70
|
+
):
|
|
71
|
+
worker.configure()
|
|
28
72
|
|
|
29
73
|
|
|
30
|
-
def
|
|
31
|
-
|
|
74
|
+
def test_readonly(responses, mock_elements_worker):
|
|
75
|
+
"""Test readonly worker does not trigger any API calls"""
|
|
32
76
|
|
|
33
|
-
|
|
77
|
+
# Setup the worker as read-only
|
|
78
|
+
mock_elements_worker.worker_run_id = None
|
|
79
|
+
assert mock_elements_worker.is_read_only is True
|
|
34
80
|
|
|
35
|
-
|
|
81
|
+
out = mock_elements_worker.update_activity("1234-deadbeef", ActivityState.Processed)
|
|
36
82
|
|
|
37
|
-
|
|
38
|
-
|
|
83
|
+
# update_activity returns False in very specific cases
|
|
84
|
+
assert out is True
|
|
85
|
+
assert len(responses.calls) == len(BASE_API_CALLS)
|
|
86
|
+
assert [
|
|
87
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
88
|
+
] == BASE_API_CALLS
|
|
39
89
|
|
|
40
|
-
with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
|
|
41
|
-
response_2 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
|
|
42
90
|
|
|
43
|
-
|
|
44
|
-
|
|
91
|
+
def test_get_elements_elements_list_arg_wrong_type(
|
|
92
|
+
monkeypatch, tmp_path, mock_elements_worker
|
|
93
|
+
):
|
|
94
|
+
elements_path = tmp_path / "elements.json"
|
|
95
|
+
elements_path.write_text("{}")
|
|
45
96
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
97
|
+
monkeypatch.setenv("TASK_ELEMENTS", str(elements_path))
|
|
98
|
+
worker = ElementsWorker()
|
|
99
|
+
worker.configure()
|
|
49
100
|
|
|
101
|
+
with pytest.raises(AssertionError, match="Elements list must be a list"):
|
|
102
|
+
worker.get_elements()
|
|
50
103
|
|
|
51
|
-
def test_get_worker_version_slug(mocker, fake_dummy_worker):
|
|
52
|
-
fake_dummy_worker.get_worker_version = mocker.MagicMock()
|
|
53
|
-
fake_dummy_worker.get_worker_version.return_value = {
|
|
54
|
-
"id": TEST_VERSION_ID,
|
|
55
|
-
"worker": {"slug": "mock_slug"},
|
|
56
|
-
}
|
|
57
104
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
105
|
+
def test_get_elements_elements_list_arg_empty_list(
|
|
106
|
+
monkeypatch, tmp_path, mock_elements_worker
|
|
107
|
+
):
|
|
108
|
+
elements_path = tmp_path / "elements.json"
|
|
109
|
+
elements_path.write_text("[]")
|
|
110
|
+
|
|
111
|
+
monkeypatch.setenv("TASK_ELEMENTS", str(elements_path))
|
|
112
|
+
worker = ElementsWorker()
|
|
113
|
+
worker.configure()
|
|
114
|
+
|
|
115
|
+
with pytest.raises(AssertionError, match="No elements in elements list"):
|
|
116
|
+
worker.get_elements()
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def test_get_elements_elements_list_arg_missing_id(
|
|
120
|
+
monkeypatch, tmp_path, mock_elements_worker
|
|
121
|
+
):
|
|
122
|
+
elements_path = tmp_path / "elements.json"
|
|
123
|
+
elements_path.write_text(json.dumps([{"type": "volume"}]))
|
|
124
|
+
|
|
125
|
+
monkeypatch.setenv("TASK_ELEMENTS", str(elements_path))
|
|
126
|
+
worker = ElementsWorker()
|
|
127
|
+
worker.configure()
|
|
128
|
+
|
|
129
|
+
elt_list = worker.get_elements()
|
|
130
|
+
|
|
131
|
+
assert elt_list == []
|
|
132
|
+
|
|
61
133
|
|
|
134
|
+
def test_get_elements_elements_list_arg_not_uuid(
|
|
135
|
+
monkeypatch, tmp_path, mock_elements_worker
|
|
136
|
+
):
|
|
137
|
+
elements_path = tmp_path / "elements.json"
|
|
138
|
+
elements_path.write_text(
|
|
139
|
+
json.dumps(
|
|
140
|
+
[
|
|
141
|
+
{"id": "volumeid", "type": "volume"},
|
|
142
|
+
{"id": "pageid", "type": "page"},
|
|
143
|
+
{"id": "actid", "type": "act"},
|
|
144
|
+
{"id": "surfaceid", "type": "surface"},
|
|
145
|
+
]
|
|
146
|
+
)
|
|
147
|
+
)
|
|
62
148
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
149
|
+
monkeypatch.setenv("TASK_ELEMENTS", str(elements_path))
|
|
150
|
+
worker = ElementsWorker()
|
|
151
|
+
worker.configure()
|
|
152
|
+
|
|
153
|
+
with pytest.raises(
|
|
154
|
+
Exception,
|
|
155
|
+
match="These element IDs are invalid: volumeid, pageid, actid, surfaceid",
|
|
68
156
|
):
|
|
69
|
-
|
|
157
|
+
worker.get_elements()
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def test_get_elements_elements_list_arg(monkeypatch, tmp_path, mock_elements_worker):
|
|
161
|
+
elements_path = tmp_path / "elements.json"
|
|
162
|
+
elements_path.write_text(
|
|
163
|
+
json.dumps(
|
|
164
|
+
[
|
|
165
|
+
{"id": "11111111-1111-1111-1111-111111111111", "type": "volume"},
|
|
166
|
+
{"id": "22222222-2222-2222-2222-222222222222", "type": "page"},
|
|
167
|
+
{"id": "33333333-3333-3333-3333-333333333333", "type": "act"},
|
|
168
|
+
]
|
|
169
|
+
)
|
|
170
|
+
)
|
|
70
171
|
|
|
172
|
+
monkeypatch.setenv("TASK_ELEMENTS", str(elements_path))
|
|
173
|
+
worker = ElementsWorker()
|
|
174
|
+
worker.configure()
|
|
71
175
|
|
|
72
|
-
|
|
73
|
-
"""Test readonly worker does not trigger any API calls"""
|
|
176
|
+
elt_list = worker.get_elements()
|
|
74
177
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
178
|
+
assert elt_list == [
|
|
179
|
+
"11111111-1111-1111-1111-111111111111",
|
|
180
|
+
"22222222-2222-2222-2222-222222222222",
|
|
181
|
+
"33333333-3333-3333-3333-333333333333",
|
|
182
|
+
]
|
|
78
183
|
|
|
79
|
-
out = mock_elements_worker.update_activity("1234-deadbeef", ActivityState.Processed)
|
|
80
184
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
185
|
+
def test_get_elements_element_arg_not_uuid(mocker, mock_elements_worker):
|
|
186
|
+
mocker.patch(
|
|
187
|
+
"arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
|
|
188
|
+
return_value=Namespace(
|
|
189
|
+
element=["volumeid", "pageid"],
|
|
190
|
+
config={},
|
|
191
|
+
verbose=False,
|
|
192
|
+
elements_list=None,
|
|
193
|
+
database=None,
|
|
194
|
+
dev=True,
|
|
195
|
+
set=[],
|
|
196
|
+
),
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
worker = ElementsWorker()
|
|
200
|
+
worker.configure()
|
|
201
|
+
|
|
202
|
+
with pytest.raises(
|
|
203
|
+
Exception, match="These element IDs are invalid: volumeid, pageid"
|
|
204
|
+
):
|
|
205
|
+
worker.get_elements()
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def test_get_elements_element_arg(mocker, mock_elements_worker):
|
|
209
|
+
mocker.patch(
|
|
210
|
+
"arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
|
|
211
|
+
return_value=Namespace(
|
|
212
|
+
element=[
|
|
213
|
+
"11111111-1111-1111-1111-111111111111",
|
|
214
|
+
"22222222-2222-2222-2222-222222222222",
|
|
215
|
+
],
|
|
216
|
+
config={},
|
|
217
|
+
verbose=False,
|
|
218
|
+
elements_list=None,
|
|
219
|
+
database=None,
|
|
220
|
+
dev=True,
|
|
221
|
+
set=[],
|
|
222
|
+
),
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
worker = ElementsWorker()
|
|
226
|
+
worker.configure()
|
|
227
|
+
|
|
228
|
+
elt_list = worker.get_elements()
|
|
229
|
+
|
|
230
|
+
assert elt_list == [
|
|
231
|
+
"11111111-1111-1111-1111-111111111111",
|
|
232
|
+
"22222222-2222-2222-2222-222222222222",
|
|
233
|
+
]
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def test_get_elements_dataset_set_arg(responses, mocker, mock_elements_worker):
|
|
237
|
+
mocker.patch(
|
|
238
|
+
"arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
|
|
239
|
+
return_value=Namespace(
|
|
240
|
+
element=[],
|
|
241
|
+
config={},
|
|
242
|
+
verbose=False,
|
|
243
|
+
elements_list=None,
|
|
244
|
+
database=None,
|
|
245
|
+
dev=True,
|
|
246
|
+
set=[(UUID("11111111-1111-1111-1111-111111111111"), "train")],
|
|
247
|
+
),
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# Mock RetrieveDataset call
|
|
251
|
+
responses.add(
|
|
252
|
+
responses.GET,
|
|
253
|
+
"http://testserver/api/v1/datasets/11111111-1111-1111-1111-111111111111/",
|
|
254
|
+
status=200,
|
|
255
|
+
json={
|
|
256
|
+
"id": "11111111-1111-1111-1111-111111111111",
|
|
257
|
+
"name": "My dataset",
|
|
258
|
+
"description": "A dataset about cats.",
|
|
259
|
+
"sets": ["train", "dev", "test"],
|
|
260
|
+
"state": DatasetState.Complete.value,
|
|
261
|
+
},
|
|
262
|
+
content_type="application/json",
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
# Mock ListSetElements call
|
|
266
|
+
element = {
|
|
267
|
+
"id": "22222222-2222-2222-2222-222222222222",
|
|
268
|
+
"type": "page",
|
|
269
|
+
"name": "1",
|
|
270
|
+
"corpus": {
|
|
271
|
+
"id": "11111111-1111-1111-1111-111111111111",
|
|
272
|
+
},
|
|
273
|
+
"thumbnail_url": "http://example.com",
|
|
274
|
+
"zone": {
|
|
275
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
276
|
+
"polygon": [[0, 0], [0, 0], [0, 0]],
|
|
277
|
+
"image": {
|
|
278
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
279
|
+
"path": "string",
|
|
280
|
+
"width": 0,
|
|
281
|
+
"height": 0,
|
|
282
|
+
"url": "http://example.com",
|
|
283
|
+
"s3_url": "string",
|
|
284
|
+
"status": "checked",
|
|
285
|
+
"server": {
|
|
286
|
+
"display_name": "string",
|
|
287
|
+
"url": "http://example.com",
|
|
288
|
+
"max_width": 2147483647,
|
|
289
|
+
"max_height": 2147483647,
|
|
290
|
+
},
|
|
291
|
+
},
|
|
292
|
+
"url": "http://example.com",
|
|
293
|
+
},
|
|
294
|
+
"rotation_angle": 0,
|
|
295
|
+
"mirrored": False,
|
|
296
|
+
"created": "2019-08-24T14:15:22Z",
|
|
297
|
+
"classes": [
|
|
298
|
+
{
|
|
299
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
300
|
+
"ml_class": {
|
|
301
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
302
|
+
"name": "string",
|
|
303
|
+
},
|
|
304
|
+
"state": "pending",
|
|
305
|
+
"confidence": 0,
|
|
306
|
+
"high_confidence": True,
|
|
307
|
+
"worker_run": {
|
|
308
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
309
|
+
"summary": "string",
|
|
310
|
+
},
|
|
311
|
+
}
|
|
312
|
+
],
|
|
313
|
+
"metadata": [
|
|
314
|
+
{
|
|
315
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
316
|
+
"type": "text",
|
|
317
|
+
"name": "string",
|
|
318
|
+
"value": "string",
|
|
319
|
+
"dates": [{"type": "exact", "year": 0, "month": 1, "day": 1}],
|
|
320
|
+
}
|
|
321
|
+
],
|
|
322
|
+
"transcriptions": [
|
|
323
|
+
{
|
|
324
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
325
|
+
"text": "string",
|
|
326
|
+
"confidence": 0,
|
|
327
|
+
"orientation": "horizontal-lr",
|
|
328
|
+
"worker_run": {
|
|
329
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
330
|
+
"summary": "string",
|
|
331
|
+
},
|
|
332
|
+
}
|
|
333
|
+
],
|
|
334
|
+
"has_children": True,
|
|
335
|
+
"worker_run": {
|
|
336
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
337
|
+
"summary": "string",
|
|
338
|
+
},
|
|
339
|
+
"confidence": 1,
|
|
340
|
+
}
|
|
341
|
+
responses.add(
|
|
342
|
+
responses.GET,
|
|
343
|
+
"http://testserver/api/v1/datasets/11111111-1111-1111-1111-111111111111/elements/?set=train&with_count=true",
|
|
344
|
+
status=200,
|
|
345
|
+
json={
|
|
346
|
+
"next": None,
|
|
347
|
+
"previous": None,
|
|
348
|
+
"results": [
|
|
349
|
+
{
|
|
350
|
+
"set": "train",
|
|
351
|
+
"element": element,
|
|
352
|
+
}
|
|
353
|
+
],
|
|
354
|
+
"count": 1,
|
|
355
|
+
},
|
|
356
|
+
content_type="application/json",
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
worker = ElementsWorker()
|
|
360
|
+
worker.configure()
|
|
361
|
+
|
|
362
|
+
elt_list = worker.get_elements()
|
|
363
|
+
|
|
364
|
+
assert elt_list == [
|
|
365
|
+
Element(**element),
|
|
366
|
+
]
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def test_get_elements_dataset_set_api(responses, mocker, mock_elements_worker):
|
|
370
|
+
# Mock ListProcessSets call
|
|
371
|
+
responses.add(
|
|
372
|
+
responses.GET,
|
|
373
|
+
"http://testserver/api/v1/process/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff/sets/",
|
|
374
|
+
status=200,
|
|
375
|
+
json={
|
|
376
|
+
"next": None,
|
|
377
|
+
"previous": None,
|
|
378
|
+
"results": [
|
|
379
|
+
{
|
|
380
|
+
"id": "33333333-3333-3333-3333-333333333333",
|
|
381
|
+
"dataset": {"id": "11111111-1111-1111-1111-111111111111"},
|
|
382
|
+
"set_name": "train",
|
|
383
|
+
}
|
|
384
|
+
],
|
|
385
|
+
"count": 1,
|
|
386
|
+
},
|
|
387
|
+
content_type="application/json",
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
# Mock ListSetElements call
|
|
391
|
+
element = {
|
|
392
|
+
"id": "22222222-2222-2222-2222-222222222222",
|
|
393
|
+
"type": "page",
|
|
394
|
+
"name": "1",
|
|
395
|
+
"corpus": {
|
|
396
|
+
"id": "11111111-1111-1111-1111-111111111111",
|
|
397
|
+
},
|
|
398
|
+
"thumbnail_url": "http://example.com",
|
|
399
|
+
"zone": {
|
|
400
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
401
|
+
"polygon": [[0, 0], [0, 0], [0, 0]],
|
|
402
|
+
"image": {
|
|
403
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
404
|
+
"path": "string",
|
|
405
|
+
"width": 0,
|
|
406
|
+
"height": 0,
|
|
407
|
+
"url": "http://example.com",
|
|
408
|
+
"s3_url": "string",
|
|
409
|
+
"status": "checked",
|
|
410
|
+
"server": {
|
|
411
|
+
"display_name": "string",
|
|
412
|
+
"url": "http://example.com",
|
|
413
|
+
"max_width": 2147483647,
|
|
414
|
+
"max_height": 2147483647,
|
|
415
|
+
},
|
|
416
|
+
},
|
|
417
|
+
"url": "http://example.com",
|
|
418
|
+
},
|
|
419
|
+
"rotation_angle": 0,
|
|
420
|
+
"mirrored": False,
|
|
421
|
+
"created": "2019-08-24T14:15:22Z",
|
|
422
|
+
"classes": [
|
|
423
|
+
{
|
|
424
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
425
|
+
"ml_class": {
|
|
426
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
427
|
+
"name": "string",
|
|
428
|
+
},
|
|
429
|
+
"state": "pending",
|
|
430
|
+
"confidence": 0,
|
|
431
|
+
"high_confidence": True,
|
|
432
|
+
"worker_run": {
|
|
433
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
434
|
+
"summary": "string",
|
|
435
|
+
},
|
|
436
|
+
}
|
|
437
|
+
],
|
|
438
|
+
"metadata": [
|
|
439
|
+
{
|
|
440
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
441
|
+
"type": "text",
|
|
442
|
+
"name": "string",
|
|
443
|
+
"value": "string",
|
|
444
|
+
"dates": [{"type": "exact", "year": 0, "month": 1, "day": 1}],
|
|
445
|
+
}
|
|
446
|
+
],
|
|
447
|
+
"transcriptions": [
|
|
448
|
+
{
|
|
449
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
450
|
+
"text": "string",
|
|
451
|
+
"confidence": 0,
|
|
452
|
+
"orientation": "horizontal-lr",
|
|
453
|
+
"worker_run": {
|
|
454
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
455
|
+
"summary": "string",
|
|
456
|
+
},
|
|
457
|
+
}
|
|
458
|
+
],
|
|
459
|
+
"has_children": True,
|
|
460
|
+
"worker_run": {
|
|
461
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
462
|
+
"summary": "string",
|
|
463
|
+
},
|
|
464
|
+
"confidence": 1,
|
|
465
|
+
}
|
|
466
|
+
responses.add(
|
|
467
|
+
responses.GET,
|
|
468
|
+
"http://testserver/api/v1/datasets/11111111-1111-1111-1111-111111111111/elements/?set=train&with_count=true",
|
|
469
|
+
status=200,
|
|
470
|
+
json={
|
|
471
|
+
"next": None,
|
|
472
|
+
"previous": None,
|
|
473
|
+
"results": [
|
|
474
|
+
{
|
|
475
|
+
"set": "train",
|
|
476
|
+
"element": element,
|
|
477
|
+
}
|
|
478
|
+
],
|
|
479
|
+
"count": 1,
|
|
480
|
+
},
|
|
481
|
+
content_type="application/json",
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
# Update ProcessMode to Dataset
|
|
485
|
+
mock_elements_worker.process_information["mode"] = ProcessMode.Dataset
|
|
486
|
+
|
|
487
|
+
elt_list = mock_elements_worker.get_elements()
|
|
488
|
+
|
|
489
|
+
assert elt_list == [
|
|
490
|
+
Element(**element),
|
|
491
|
+
]
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def test_get_elements_both_args_error(mocker, mock_elements_worker, tmp_path):
|
|
495
|
+
elements_path = tmp_path / "elements.json"
|
|
496
|
+
elements_path.write_text(
|
|
497
|
+
json.dumps(
|
|
498
|
+
[
|
|
499
|
+
{"id": "volumeid", "type": "volume"},
|
|
500
|
+
{"id": "pageid", "type": "page"},
|
|
501
|
+
{"id": "actid", "type": "act"},
|
|
502
|
+
{"id": "surfaceid", "type": "surface"},
|
|
503
|
+
]
|
|
504
|
+
)
|
|
505
|
+
)
|
|
506
|
+
mocker.patch(
|
|
507
|
+
"arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
|
|
508
|
+
return_value=Namespace(
|
|
509
|
+
element=["anotherid", "againanotherid"],
|
|
510
|
+
verbose=False,
|
|
511
|
+
elements_list=elements_path.open(),
|
|
512
|
+
database=None,
|
|
513
|
+
dev=False,
|
|
514
|
+
set=[],
|
|
515
|
+
),
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
worker = ElementsWorker()
|
|
519
|
+
worker.configure()
|
|
520
|
+
|
|
521
|
+
with pytest.raises(
|
|
522
|
+
AssertionError, match="elements-list and element CLI args shouldn't be both set"
|
|
523
|
+
):
|
|
524
|
+
worker.get_elements()
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def test_get_elements_export_process(mock_elements_worker, responses):
|
|
528
|
+
responses.add(
|
|
529
|
+
responses.GET,
|
|
530
|
+
f"http://testserver/api/v1/process/{PROCESS_ID}/elements/?page_size=500&with_count=true&with_image=False",
|
|
531
|
+
status=200,
|
|
532
|
+
json={
|
|
533
|
+
"count": 2,
|
|
534
|
+
"next": None,
|
|
535
|
+
"results": [
|
|
536
|
+
{
|
|
537
|
+
"id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
538
|
+
"type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
539
|
+
"name": "element 1",
|
|
540
|
+
"confidence": 1,
|
|
541
|
+
"image_id": None,
|
|
542
|
+
"image_width": None,
|
|
543
|
+
"image_height": None,
|
|
544
|
+
"image_url": None,
|
|
545
|
+
"polygon": None,
|
|
546
|
+
"rotation_angle": 0,
|
|
547
|
+
"mirrored": False,
|
|
548
|
+
},
|
|
549
|
+
{
|
|
550
|
+
"id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0",
|
|
551
|
+
"type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
552
|
+
"name": "element 2",
|
|
553
|
+
"confidence": 1,
|
|
554
|
+
"image_id": None,
|
|
555
|
+
"image_width": None,
|
|
556
|
+
"image_height": None,
|
|
557
|
+
"image_url": None,
|
|
558
|
+
"polygon": None,
|
|
559
|
+
"rotation_angle": 0,
|
|
560
|
+
"mirrored": False,
|
|
561
|
+
},
|
|
562
|
+
],
|
|
563
|
+
},
|
|
564
|
+
)
|
|
565
|
+
mock_elements_worker.process_information["mode"] = "export"
|
|
566
|
+
assert set(mock_elements_worker.get_elements()) == {
|
|
567
|
+
"aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
568
|
+
"aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0",
|
|
569
|
+
}
|
|
87
570
|
|
|
88
571
|
|
|
89
572
|
@pytest.mark.usefixtures("_mock_worker_run_api")
|
|
@@ -114,43 +597,6 @@ def test_activities_dev_mode(mocker):
|
|
|
114
597
|
assert worker.store_activity is False
|
|
115
598
|
|
|
116
599
|
|
|
117
|
-
@pytest.mark.usefixtures("_mock_worker_run_api")
|
|
118
|
-
def test_update_call(responses, mock_elements_worker):
|
|
119
|
-
"""Test an update call with feature enabled triggers an API call"""
|
|
120
|
-
responses.add(
|
|
121
|
-
responses.PUT,
|
|
122
|
-
"http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
|
|
123
|
-
status=200,
|
|
124
|
-
json={
|
|
125
|
-
"element_id": "1234-deadbeef",
|
|
126
|
-
"process_id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
127
|
-
"state": "processed",
|
|
128
|
-
},
|
|
129
|
-
)
|
|
130
|
-
|
|
131
|
-
out = mock_elements_worker.update_activity("1234-deadbeef", ActivityState.Processed)
|
|
132
|
-
|
|
133
|
-
# Check the response received by worker
|
|
134
|
-
assert out is True
|
|
135
|
-
|
|
136
|
-
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
137
|
-
assert [
|
|
138
|
-
(call.request.method, call.request.url) for call in responses.calls
|
|
139
|
-
] == BASE_API_CALLS + [
|
|
140
|
-
(
|
|
141
|
-
"PUT",
|
|
142
|
-
"http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
|
|
143
|
-
),
|
|
144
|
-
]
|
|
145
|
-
|
|
146
|
-
# Check the request sent by worker
|
|
147
|
-
assert json.loads(responses.calls[-1].request.body) == {
|
|
148
|
-
"element_id": "1234-deadbeef",
|
|
149
|
-
"process_id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
150
|
-
"state": "processed",
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
|
|
154
600
|
@pytest.mark.usefixtures("_mock_activity_calls")
|
|
155
601
|
@pytest.mark.parametrize(
|
|
156
602
|
("process_exception", "final_state"),
|
|
@@ -314,200 +760,38 @@ def test_start_activity_error(
|
|
|
314
760
|
]
|
|
315
761
|
|
|
316
762
|
|
|
317
|
-
@pytest.mark.
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
"wk_version_user_config",
|
|
321
|
-
"frontend_user_config",
|
|
322
|
-
"model_config",
|
|
323
|
-
"expected_config",
|
|
324
|
-
),
|
|
325
|
-
[
|
|
326
|
-
({}, {}, {}, {}, {}),
|
|
327
|
-
# Keep parameters from worker version configuration
|
|
328
|
-
({"parameter": 0}, {}, {}, {}, {"parameter": 0}),
|
|
329
|
-
# Keep parameters from worker version configuration + user_config defaults
|
|
330
|
-
(
|
|
331
|
-
{"parameter": 0},
|
|
332
|
-
{
|
|
333
|
-
"parameter2": {
|
|
334
|
-
"type": "int",
|
|
335
|
-
"title": "Lambda",
|
|
336
|
-
"default": 0,
|
|
337
|
-
"required": False,
|
|
338
|
-
}
|
|
339
|
-
},
|
|
340
|
-
{},
|
|
341
|
-
{},
|
|
342
|
-
{"parameter": 0, "parameter2": 0},
|
|
343
|
-
),
|
|
344
|
-
# Keep parameters from worker version configuration + user_config no defaults
|
|
345
|
-
(
|
|
346
|
-
{"parameter": 0},
|
|
347
|
-
{
|
|
348
|
-
"parameter2": {
|
|
349
|
-
"type": "int",
|
|
350
|
-
"title": "Lambda",
|
|
351
|
-
"required": False,
|
|
352
|
-
}
|
|
353
|
-
},
|
|
354
|
-
{},
|
|
355
|
-
{},
|
|
356
|
-
{"parameter": 0},
|
|
357
|
-
),
|
|
358
|
-
# Keep parameters from worker version configuration but user_config defaults overrides
|
|
359
|
-
(
|
|
360
|
-
{"parameter": 0},
|
|
361
|
-
{
|
|
362
|
-
"parameter": {
|
|
363
|
-
"type": "int",
|
|
364
|
-
"title": "Lambda",
|
|
365
|
-
"default": 1,
|
|
366
|
-
"required": False,
|
|
367
|
-
}
|
|
368
|
-
},
|
|
369
|
-
{},
|
|
370
|
-
{},
|
|
371
|
-
{"parameter": 1},
|
|
372
|
-
),
|
|
373
|
-
# Keep parameters from worker version configuration + frontend config
|
|
374
|
-
(
|
|
375
|
-
{"parameter": 0},
|
|
376
|
-
{},
|
|
377
|
-
{"parameter2": 0},
|
|
378
|
-
{},
|
|
379
|
-
{"parameter": 0, "parameter2": 0},
|
|
380
|
-
),
|
|
381
|
-
# Keep parameters from worker version configuration + frontend config overrides
|
|
382
|
-
({"parameter": 0}, {}, {"parameter": 1}, {}, {"parameter": 1}),
|
|
383
|
-
# Keep parameters from worker version configuration + model config
|
|
384
|
-
(
|
|
385
|
-
{"parameter": 0},
|
|
386
|
-
{},
|
|
387
|
-
{},
|
|
388
|
-
{"parameter2": 0},
|
|
389
|
-
{"parameter": 0, "parameter2": 0},
|
|
390
|
-
),
|
|
391
|
-
# Keep parameters from worker version configuration + model config overrides
|
|
392
|
-
({"parameter": 0}, {}, {}, {"parameter": 1}, {"parameter": 1}),
|
|
393
|
-
# Keep parameters from worker version configuration + user_config default + model config overrides
|
|
394
|
-
(
|
|
395
|
-
{"parameter": 0},
|
|
396
|
-
{
|
|
397
|
-
"parameter": {
|
|
398
|
-
"type": "int",
|
|
399
|
-
"title": "Lambda",
|
|
400
|
-
"default": 1,
|
|
401
|
-
"required": False,
|
|
402
|
-
}
|
|
403
|
-
},
|
|
404
|
-
{},
|
|
405
|
-
{"parameter": 2},
|
|
406
|
-
{"parameter": 2},
|
|
407
|
-
),
|
|
408
|
-
# Keep parameters from worker version configuration + model config + frontend config overrides
|
|
409
|
-
({"parameter": 0}, {}, {"parameter": 2}, {"parameter": 1}, {"parameter": 2}),
|
|
410
|
-
# Keep parameters from worker version configuration + user_config default + model config + frontend config overrides all
|
|
411
|
-
(
|
|
412
|
-
{"parameter": 0},
|
|
413
|
-
{
|
|
414
|
-
"parameter": {
|
|
415
|
-
"type": "int",
|
|
416
|
-
"title": "Lambda",
|
|
417
|
-
"default": 1,
|
|
418
|
-
"required": False,
|
|
419
|
-
}
|
|
420
|
-
},
|
|
421
|
-
{"parameter": 3},
|
|
422
|
-
{"parameter": 2},
|
|
423
|
-
{"parameter": 3},
|
|
424
|
-
),
|
|
425
|
-
],
|
|
426
|
-
)
|
|
427
|
-
def test_worker_config_multiple_source(
|
|
428
|
-
monkeypatch,
|
|
429
|
-
responses,
|
|
430
|
-
wk_version_config,
|
|
431
|
-
wk_version_user_config,
|
|
432
|
-
frontend_user_config,
|
|
433
|
-
model_config,
|
|
434
|
-
expected_config,
|
|
435
|
-
):
|
|
436
|
-
# Compute WorkerRun info
|
|
437
|
-
payload = {
|
|
438
|
-
"id": "56785678-5678-5678-5678-567856785678",
|
|
439
|
-
"parents": [],
|
|
440
|
-
"worker_version": {
|
|
441
|
-
"id": "12341234-1234-1234-1234-123412341234",
|
|
442
|
-
"configuration": {
|
|
443
|
-
"docker": {"image": "python:3"},
|
|
444
|
-
"configuration": wk_version_config,
|
|
445
|
-
"secrets": [],
|
|
446
|
-
"user_configuration": wk_version_user_config,
|
|
447
|
-
},
|
|
448
|
-
"revision": {
|
|
449
|
-
"hash": "deadbeef1234",
|
|
450
|
-
"name": "some git revision",
|
|
451
|
-
},
|
|
452
|
-
"docker_image": "python:3",
|
|
453
|
-
"docker_image_name": "python:3",
|
|
454
|
-
"state": "created",
|
|
455
|
-
"worker": {
|
|
456
|
-
"id": "deadbeef-1234-5678-1234-worker",
|
|
457
|
-
"name": "Fake worker",
|
|
458
|
-
"slug": "fake_worker",
|
|
459
|
-
"type": "classifier",
|
|
460
|
-
},
|
|
461
|
-
},
|
|
462
|
-
"configuration": {
|
|
463
|
-
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
464
|
-
"name": "Configuration entered by user",
|
|
465
|
-
"configuration": frontend_user_config,
|
|
466
|
-
},
|
|
467
|
-
"model_version": {
|
|
468
|
-
"id": "12341234-1234-1234-1234-123412341234",
|
|
469
|
-
"name": "Model version 1337",
|
|
470
|
-
"configuration": model_config,
|
|
471
|
-
"model": {
|
|
472
|
-
"id": "hahahaha-haha-haha-haha-hahahahahaha",
|
|
473
|
-
"name": "My model",
|
|
474
|
-
},
|
|
475
|
-
},
|
|
476
|
-
"process": {
|
|
477
|
-
"name": None,
|
|
478
|
-
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
479
|
-
"state": "running",
|
|
480
|
-
"mode": "workers",
|
|
481
|
-
"corpus": "11111111-1111-1111-1111-111111111111",
|
|
482
|
-
"use_cache": False,
|
|
483
|
-
"activity_state": "ready",
|
|
484
|
-
"model_id": None,
|
|
485
|
-
"train_folder_id": None,
|
|
486
|
-
"validation_folder_id": None,
|
|
487
|
-
"test_folder_id": None,
|
|
488
|
-
},
|
|
489
|
-
"summary": "Worker Fake worker @ 123412",
|
|
490
|
-
}
|
|
491
|
-
|
|
763
|
+
@pytest.mark.usefixtures("_mock_worker_run_api")
|
|
764
|
+
def test_update_activity(responses, mock_elements_worker):
|
|
765
|
+
"""Test an update call with feature enabled triggers an API call"""
|
|
492
766
|
responses.add(
|
|
493
|
-
responses.
|
|
494
|
-
"http://testserver/api/v1/
|
|
767
|
+
responses.PUT,
|
|
768
|
+
"http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
|
|
495
769
|
status=200,
|
|
496
|
-
|
|
497
|
-
|
|
770
|
+
json={
|
|
771
|
+
"element_id": "1234-deadbeef",
|
|
772
|
+
"process_id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
773
|
+
"state": "processed",
|
|
774
|
+
},
|
|
498
775
|
)
|
|
499
776
|
|
|
500
|
-
|
|
501
|
-
monkeypatch.setattr(sys, "argv", ["worker"])
|
|
502
|
-
worker = ElementsWorker()
|
|
503
|
-
worker.configure()
|
|
777
|
+
out = mock_elements_worker.update_activity("1234-deadbeef", ActivityState.Processed)
|
|
504
778
|
|
|
505
|
-
#
|
|
506
|
-
|
|
507
|
-
worker.config.update(worker.model_configuration)
|
|
779
|
+
# Check the response received by worker
|
|
780
|
+
assert out is True
|
|
508
781
|
|
|
509
|
-
|
|
510
|
-
|
|
782
|
+
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
783
|
+
assert [
|
|
784
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
785
|
+
] == BASE_API_CALLS + [
|
|
786
|
+
(
|
|
787
|
+
"PUT",
|
|
788
|
+
"http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
|
|
789
|
+
),
|
|
790
|
+
]
|
|
511
791
|
|
|
512
|
-
# Check
|
|
513
|
-
assert
|
|
792
|
+
# Check the request sent by worker
|
|
793
|
+
assert json.loads(responses.calls[-1].request.body) == {
|
|
794
|
+
"element_id": "1234-deadbeef",
|
|
795
|
+
"process_id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
796
|
+
"state": "processed",
|
|
797
|
+
}
|