arkindex-base-worker 0.4.0b3__py3-none-any.whl → 0.4.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.4.0b3.dist-info → arkindex_base_worker-0.4.0rc2.dist-info}/METADATA +4 -3
- {arkindex_base_worker-0.4.0b3.dist-info → arkindex_base_worker-0.4.0rc2.dist-info}/RECORD +21 -20
- {arkindex_base_worker-0.4.0b3.dist-info → arkindex_base_worker-0.4.0rc2.dist-info}/WHEEL +1 -1
- arkindex_worker/image.py +118 -0
- arkindex_worker/worker/__init__.py +26 -158
- arkindex_worker/worker/base.py +32 -1
- arkindex_worker/worker/dataset.py +70 -0
- arkindex_worker/worker/element.py +260 -75
- arkindex_worker/worker/process.py +63 -0
- arkindex_worker/worker/transcription.py +50 -50
- tests/__init__.py +1 -1
- tests/conftest.py +11 -23
- tests/test_base_worker.py +203 -2
- tests/test_dataset_worker.py +5 -2
- tests/test_elements_worker/test_elements.py +712 -18
- tests/test_elements_worker/test_worker.py +0 -200
- tests/test_image.py +248 -6
- tests/test_merge.py +0 -1
- tests/test_utils.py +2 -4
- {arkindex_base_worker-0.4.0b3.dist-info → arkindex_base_worker-0.4.0rc2.dist-info}/LICENSE +0 -0
- {arkindex_base_worker-0.4.0b3.dist-info → arkindex_base_worker-0.4.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -17,7 +17,9 @@ from arkindex_worker.cache import (
|
|
|
17
17
|
from arkindex_worker.models import Element
|
|
18
18
|
from arkindex_worker.utils import DEFAULT_BATCH_SIZE
|
|
19
19
|
from arkindex_worker.worker import ElementsWorker
|
|
20
|
+
from arkindex_worker.worker.dataset import DatasetState
|
|
20
21
|
from arkindex_worker.worker.element import MissingTypeError
|
|
22
|
+
from arkindex_worker.worker.process import ProcessMode
|
|
21
23
|
from tests import CORPUS_ID
|
|
22
24
|
|
|
23
25
|
from . import BASE_API_CALLS
|
|
@@ -109,7 +111,7 @@ def test_create_missing_types(responses, mock_elements_worker):
|
|
|
109
111
|
)
|
|
110
112
|
|
|
111
113
|
|
|
112
|
-
def
|
|
114
|
+
def test_get_elements_elements_list_arg_wrong_type(
|
|
113
115
|
monkeypatch, tmp_path, mock_elements_worker
|
|
114
116
|
):
|
|
115
117
|
elements_path = tmp_path / "elements.json"
|
|
@@ -120,10 +122,10 @@ def test_list_elements_elements_list_arg_wrong_type(
|
|
|
120
122
|
worker.configure()
|
|
121
123
|
|
|
122
124
|
with pytest.raises(AssertionError, match="Elements list must be a list"):
|
|
123
|
-
worker.
|
|
125
|
+
worker.get_elements()
|
|
124
126
|
|
|
125
127
|
|
|
126
|
-
def
|
|
128
|
+
def test_get_elements_elements_list_arg_empty_list(
|
|
127
129
|
monkeypatch, tmp_path, mock_elements_worker
|
|
128
130
|
):
|
|
129
131
|
elements_path = tmp_path / "elements.json"
|
|
@@ -134,10 +136,10 @@ def test_list_elements_elements_list_arg_empty_list(
|
|
|
134
136
|
worker.configure()
|
|
135
137
|
|
|
136
138
|
with pytest.raises(AssertionError, match="No elements in elements list"):
|
|
137
|
-
worker.
|
|
139
|
+
worker.get_elements()
|
|
138
140
|
|
|
139
141
|
|
|
140
|
-
def
|
|
142
|
+
def test_get_elements_elements_list_arg_missing_id(
|
|
141
143
|
monkeypatch, tmp_path, mock_elements_worker
|
|
142
144
|
):
|
|
143
145
|
elements_path = tmp_path / "elements.json"
|
|
@@ -147,12 +149,12 @@ def test_list_elements_elements_list_arg_missing_id(
|
|
|
147
149
|
worker = ElementsWorker()
|
|
148
150
|
worker.configure()
|
|
149
151
|
|
|
150
|
-
elt_list = worker.
|
|
152
|
+
elt_list = worker.get_elements()
|
|
151
153
|
|
|
152
154
|
assert elt_list == []
|
|
153
155
|
|
|
154
156
|
|
|
155
|
-
def
|
|
157
|
+
def test_get_elements_elements_list_arg_not_uuid(
|
|
156
158
|
monkeypatch, tmp_path, mock_elements_worker
|
|
157
159
|
):
|
|
158
160
|
elements_path = tmp_path / "elements.json"
|
|
@@ -175,10 +177,10 @@ def test_list_elements_elements_list_arg_not_uuid(
|
|
|
175
177
|
Exception,
|
|
176
178
|
match="These element IDs are invalid: volumeid, pageid, actid, surfaceid",
|
|
177
179
|
):
|
|
178
|
-
worker.
|
|
180
|
+
worker.get_elements()
|
|
179
181
|
|
|
180
182
|
|
|
181
|
-
def
|
|
183
|
+
def test_get_elements_elements_list_arg(monkeypatch, tmp_path, mock_elements_worker):
|
|
182
184
|
elements_path = tmp_path / "elements.json"
|
|
183
185
|
elements_path.write_text(
|
|
184
186
|
json.dumps(
|
|
@@ -194,7 +196,7 @@ def test_list_elements_elements_list_arg(monkeypatch, tmp_path, mock_elements_wo
|
|
|
194
196
|
worker = ElementsWorker()
|
|
195
197
|
worker.configure()
|
|
196
198
|
|
|
197
|
-
elt_list = worker.
|
|
199
|
+
elt_list = worker.get_elements()
|
|
198
200
|
|
|
199
201
|
assert elt_list == [
|
|
200
202
|
"11111111-1111-1111-1111-111111111111",
|
|
@@ -203,15 +205,17 @@ def test_list_elements_elements_list_arg(monkeypatch, tmp_path, mock_elements_wo
|
|
|
203
205
|
]
|
|
204
206
|
|
|
205
207
|
|
|
206
|
-
def
|
|
208
|
+
def test_get_elements_element_arg_not_uuid(mocker, mock_elements_worker):
|
|
207
209
|
mocker.patch(
|
|
208
210
|
"arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
|
|
209
211
|
return_value=Namespace(
|
|
210
212
|
element=["volumeid", "pageid"],
|
|
213
|
+
config={},
|
|
211
214
|
verbose=False,
|
|
212
215
|
elements_list=None,
|
|
213
216
|
database=None,
|
|
214
|
-
dev=
|
|
217
|
+
dev=True,
|
|
218
|
+
set=[],
|
|
215
219
|
),
|
|
216
220
|
)
|
|
217
221
|
|
|
@@ -221,10 +225,10 @@ def test_list_elements_element_arg_not_uuid(mocker, mock_elements_worker):
|
|
|
221
225
|
with pytest.raises(
|
|
222
226
|
Exception, match="These element IDs are invalid: volumeid, pageid"
|
|
223
227
|
):
|
|
224
|
-
worker.
|
|
228
|
+
worker.get_elements()
|
|
225
229
|
|
|
226
230
|
|
|
227
|
-
def
|
|
231
|
+
def test_get_elements_element_arg(mocker, mock_elements_worker):
|
|
228
232
|
mocker.patch(
|
|
229
233
|
"arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
|
|
230
234
|
return_value=Namespace(
|
|
@@ -232,17 +236,19 @@ def test_list_elements_element_arg(mocker, mock_elements_worker):
|
|
|
232
236
|
"11111111-1111-1111-1111-111111111111",
|
|
233
237
|
"22222222-2222-2222-2222-222222222222",
|
|
234
238
|
],
|
|
239
|
+
config={},
|
|
235
240
|
verbose=False,
|
|
236
241
|
elements_list=None,
|
|
237
242
|
database=None,
|
|
238
|
-
dev=
|
|
243
|
+
dev=True,
|
|
244
|
+
set=[],
|
|
239
245
|
),
|
|
240
246
|
)
|
|
241
247
|
|
|
242
248
|
worker = ElementsWorker()
|
|
243
249
|
worker.configure()
|
|
244
250
|
|
|
245
|
-
elt_list = worker.
|
|
251
|
+
elt_list = worker.get_elements()
|
|
246
252
|
|
|
247
253
|
assert elt_list == [
|
|
248
254
|
"11111111-1111-1111-1111-111111111111",
|
|
@@ -250,7 +256,265 @@ def test_list_elements_element_arg(mocker, mock_elements_worker):
|
|
|
250
256
|
]
|
|
251
257
|
|
|
252
258
|
|
|
253
|
-
def
|
|
259
|
+
def test_get_elements_dataset_set_arg(responses, mocker, mock_elements_worker):
|
|
260
|
+
mocker.patch(
|
|
261
|
+
"arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
|
|
262
|
+
return_value=Namespace(
|
|
263
|
+
element=[],
|
|
264
|
+
config={},
|
|
265
|
+
verbose=False,
|
|
266
|
+
elements_list=None,
|
|
267
|
+
database=None,
|
|
268
|
+
dev=True,
|
|
269
|
+
set=[(UUID("11111111-1111-1111-1111-111111111111"), "train")],
|
|
270
|
+
),
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
# Mock RetrieveDataset call
|
|
274
|
+
responses.add(
|
|
275
|
+
responses.GET,
|
|
276
|
+
"http://testserver/api/v1/datasets/11111111-1111-1111-1111-111111111111/",
|
|
277
|
+
status=200,
|
|
278
|
+
json={
|
|
279
|
+
"id": "11111111-1111-1111-1111-111111111111",
|
|
280
|
+
"name": "My dataset",
|
|
281
|
+
"description": "A dataset about cats.",
|
|
282
|
+
"sets": ["train", "dev", "test"],
|
|
283
|
+
"state": DatasetState.Complete.value,
|
|
284
|
+
},
|
|
285
|
+
content_type="application/json",
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
# Mock ListSetElements call
|
|
289
|
+
element = {
|
|
290
|
+
"id": "22222222-2222-2222-2222-222222222222",
|
|
291
|
+
"type": "page",
|
|
292
|
+
"name": "1",
|
|
293
|
+
"corpus": {
|
|
294
|
+
"id": "11111111-1111-1111-1111-111111111111",
|
|
295
|
+
},
|
|
296
|
+
"thumbnail_url": "http://example.com",
|
|
297
|
+
"zone": {
|
|
298
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
299
|
+
"polygon": [[0, 0], [0, 0], [0, 0]],
|
|
300
|
+
"image": {
|
|
301
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
302
|
+
"path": "string",
|
|
303
|
+
"width": 0,
|
|
304
|
+
"height": 0,
|
|
305
|
+
"url": "http://example.com",
|
|
306
|
+
"s3_url": "string",
|
|
307
|
+
"status": "checked",
|
|
308
|
+
"server": {
|
|
309
|
+
"display_name": "string",
|
|
310
|
+
"url": "http://example.com",
|
|
311
|
+
"max_width": 2147483647,
|
|
312
|
+
"max_height": 2147483647,
|
|
313
|
+
},
|
|
314
|
+
},
|
|
315
|
+
"url": "http://example.com",
|
|
316
|
+
},
|
|
317
|
+
"rotation_angle": 0,
|
|
318
|
+
"mirrored": False,
|
|
319
|
+
"created": "2019-08-24T14:15:22Z",
|
|
320
|
+
"classes": [
|
|
321
|
+
{
|
|
322
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
323
|
+
"ml_class": {
|
|
324
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
325
|
+
"name": "string",
|
|
326
|
+
},
|
|
327
|
+
"state": "pending",
|
|
328
|
+
"confidence": 0,
|
|
329
|
+
"high_confidence": True,
|
|
330
|
+
"worker_run": {
|
|
331
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
332
|
+
"summary": "string",
|
|
333
|
+
},
|
|
334
|
+
}
|
|
335
|
+
],
|
|
336
|
+
"metadata": [
|
|
337
|
+
{
|
|
338
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
339
|
+
"type": "text",
|
|
340
|
+
"name": "string",
|
|
341
|
+
"value": "string",
|
|
342
|
+
"dates": [{"type": "exact", "year": 0, "month": 1, "day": 1}],
|
|
343
|
+
}
|
|
344
|
+
],
|
|
345
|
+
"transcriptions": [
|
|
346
|
+
{
|
|
347
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
348
|
+
"text": "string",
|
|
349
|
+
"confidence": 0,
|
|
350
|
+
"orientation": "horizontal-lr",
|
|
351
|
+
"worker_run": {
|
|
352
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
353
|
+
"summary": "string",
|
|
354
|
+
},
|
|
355
|
+
}
|
|
356
|
+
],
|
|
357
|
+
"has_children": True,
|
|
358
|
+
"worker_run": {
|
|
359
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
360
|
+
"summary": "string",
|
|
361
|
+
},
|
|
362
|
+
"confidence": 1,
|
|
363
|
+
}
|
|
364
|
+
responses.add(
|
|
365
|
+
responses.GET,
|
|
366
|
+
"http://testserver/api/v1/datasets/11111111-1111-1111-1111-111111111111/elements/?set=train&with_count=true",
|
|
367
|
+
status=200,
|
|
368
|
+
json={
|
|
369
|
+
"next": None,
|
|
370
|
+
"previous": None,
|
|
371
|
+
"results": [
|
|
372
|
+
{
|
|
373
|
+
"set": "train",
|
|
374
|
+
"element": element,
|
|
375
|
+
}
|
|
376
|
+
],
|
|
377
|
+
"count": 1,
|
|
378
|
+
},
|
|
379
|
+
content_type="application/json",
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
worker = ElementsWorker()
|
|
383
|
+
worker.configure()
|
|
384
|
+
|
|
385
|
+
elt_list = worker.get_elements()
|
|
386
|
+
|
|
387
|
+
assert elt_list == [
|
|
388
|
+
Element(**element),
|
|
389
|
+
]
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def test_get_elements_dataset_set_api(responses, mocker, mock_elements_worker):
|
|
393
|
+
# Mock ListProcessSets call
|
|
394
|
+
responses.add(
|
|
395
|
+
responses.GET,
|
|
396
|
+
"http://testserver/api/v1/process/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff/sets/",
|
|
397
|
+
status=200,
|
|
398
|
+
json={
|
|
399
|
+
"next": None,
|
|
400
|
+
"previous": None,
|
|
401
|
+
"results": [
|
|
402
|
+
{
|
|
403
|
+
"id": "33333333-3333-3333-3333-333333333333",
|
|
404
|
+
"dataset": {"id": "11111111-1111-1111-1111-111111111111"},
|
|
405
|
+
"set_name": "train",
|
|
406
|
+
}
|
|
407
|
+
],
|
|
408
|
+
"count": 1,
|
|
409
|
+
},
|
|
410
|
+
content_type="application/json",
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
# Mock ListSetElements call
|
|
414
|
+
element = {
|
|
415
|
+
"id": "22222222-2222-2222-2222-222222222222",
|
|
416
|
+
"type": "page",
|
|
417
|
+
"name": "1",
|
|
418
|
+
"corpus": {
|
|
419
|
+
"id": "11111111-1111-1111-1111-111111111111",
|
|
420
|
+
},
|
|
421
|
+
"thumbnail_url": "http://example.com",
|
|
422
|
+
"zone": {
|
|
423
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
424
|
+
"polygon": [[0, 0], [0, 0], [0, 0]],
|
|
425
|
+
"image": {
|
|
426
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
427
|
+
"path": "string",
|
|
428
|
+
"width": 0,
|
|
429
|
+
"height": 0,
|
|
430
|
+
"url": "http://example.com",
|
|
431
|
+
"s3_url": "string",
|
|
432
|
+
"status": "checked",
|
|
433
|
+
"server": {
|
|
434
|
+
"display_name": "string",
|
|
435
|
+
"url": "http://example.com",
|
|
436
|
+
"max_width": 2147483647,
|
|
437
|
+
"max_height": 2147483647,
|
|
438
|
+
},
|
|
439
|
+
},
|
|
440
|
+
"url": "http://example.com",
|
|
441
|
+
},
|
|
442
|
+
"rotation_angle": 0,
|
|
443
|
+
"mirrored": False,
|
|
444
|
+
"created": "2019-08-24T14:15:22Z",
|
|
445
|
+
"classes": [
|
|
446
|
+
{
|
|
447
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
448
|
+
"ml_class": {
|
|
449
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
450
|
+
"name": "string",
|
|
451
|
+
},
|
|
452
|
+
"state": "pending",
|
|
453
|
+
"confidence": 0,
|
|
454
|
+
"high_confidence": True,
|
|
455
|
+
"worker_run": {
|
|
456
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
457
|
+
"summary": "string",
|
|
458
|
+
},
|
|
459
|
+
}
|
|
460
|
+
],
|
|
461
|
+
"metadata": [
|
|
462
|
+
{
|
|
463
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
464
|
+
"type": "text",
|
|
465
|
+
"name": "string",
|
|
466
|
+
"value": "string",
|
|
467
|
+
"dates": [{"type": "exact", "year": 0, "month": 1, "day": 1}],
|
|
468
|
+
}
|
|
469
|
+
],
|
|
470
|
+
"transcriptions": [
|
|
471
|
+
{
|
|
472
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
473
|
+
"text": "string",
|
|
474
|
+
"confidence": 0,
|
|
475
|
+
"orientation": "horizontal-lr",
|
|
476
|
+
"worker_run": {
|
|
477
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
478
|
+
"summary": "string",
|
|
479
|
+
},
|
|
480
|
+
}
|
|
481
|
+
],
|
|
482
|
+
"has_children": True,
|
|
483
|
+
"worker_run": {
|
|
484
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
485
|
+
"summary": "string",
|
|
486
|
+
},
|
|
487
|
+
"confidence": 1,
|
|
488
|
+
}
|
|
489
|
+
responses.add(
|
|
490
|
+
responses.GET,
|
|
491
|
+
"http://testserver/api/v1/datasets/11111111-1111-1111-1111-111111111111/elements/?set=train&with_count=true",
|
|
492
|
+
status=200,
|
|
493
|
+
json={
|
|
494
|
+
"next": None,
|
|
495
|
+
"previous": None,
|
|
496
|
+
"results": [
|
|
497
|
+
{
|
|
498
|
+
"set": "train",
|
|
499
|
+
"element": element,
|
|
500
|
+
}
|
|
501
|
+
],
|
|
502
|
+
"count": 1,
|
|
503
|
+
},
|
|
504
|
+
content_type="application/json",
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
# Update ProcessMode to Dataset
|
|
508
|
+
mock_elements_worker.process_information["mode"] = ProcessMode.Dataset
|
|
509
|
+
|
|
510
|
+
elt_list = mock_elements_worker.get_elements()
|
|
511
|
+
|
|
512
|
+
assert elt_list == [
|
|
513
|
+
Element(**element),
|
|
514
|
+
]
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def test_get_elements_both_args_error(mocker, mock_elements_worker, tmp_path):
|
|
254
518
|
elements_path = tmp_path / "elements.json"
|
|
255
519
|
elements_path.write_text(
|
|
256
520
|
json.dumps(
|
|
@@ -270,6 +534,7 @@ def test_list_elements_both_args_error(mocker, mock_elements_worker, tmp_path):
|
|
|
270
534
|
elements_list=elements_path.open(),
|
|
271
535
|
database=None,
|
|
272
536
|
dev=False,
|
|
537
|
+
set=[],
|
|
273
538
|
),
|
|
274
539
|
)
|
|
275
540
|
|
|
@@ -279,7 +544,7 @@ def test_list_elements_both_args_error(mocker, mock_elements_worker, tmp_path):
|
|
|
279
544
|
with pytest.raises(
|
|
280
545
|
AssertionError, match="elements-list and element CLI args shouldn't be both set"
|
|
281
546
|
):
|
|
282
|
-
worker.
|
|
547
|
+
worker.get_elements()
|
|
283
548
|
|
|
284
549
|
|
|
285
550
|
def test_database_arg(mocker, mock_elements_worker, tmp_path):
|
|
@@ -295,6 +560,7 @@ def test_database_arg(mocker, mock_elements_worker, tmp_path):
|
|
|
295
560
|
elements_list=None,
|
|
296
561
|
database=database_path,
|
|
297
562
|
dev=False,
|
|
563
|
+
set=[],
|
|
298
564
|
),
|
|
299
565
|
)
|
|
300
566
|
|
|
@@ -319,6 +585,7 @@ def test_database_arg_cache_missing_version_table(
|
|
|
319
585
|
elements_list=None,
|
|
320
586
|
database=database_path,
|
|
321
587
|
dev=False,
|
|
588
|
+
set=[],
|
|
322
589
|
),
|
|
323
590
|
)
|
|
324
591
|
|
|
@@ -1958,6 +2225,433 @@ def test_partial_update_element_confidence(
|
|
|
1958
2225
|
assert cached_element.confidence == confidence
|
|
1959
2226
|
|
|
1960
2227
|
|
|
2228
|
+
def test_list_elements_wrong_folder(mock_elements_worker):
|
|
2229
|
+
with pytest.raises(AssertionError, match="folder should be of type bool"):
|
|
2230
|
+
mock_elements_worker.list_elements(folder="not bool")
|
|
2231
|
+
|
|
2232
|
+
|
|
2233
|
+
def test_list_elements_wrong_name(mock_elements_worker):
|
|
2234
|
+
with pytest.raises(AssertionError, match="name should be of type str"):
|
|
2235
|
+
mock_elements_worker.list_elements(name=1234)
|
|
2236
|
+
|
|
2237
|
+
|
|
2238
|
+
def test_list_elements_wrong_top_level(mock_elements_worker):
|
|
2239
|
+
with pytest.raises(AssertionError, match="top_level should be of type bool"):
|
|
2240
|
+
mock_elements_worker.list_elements(top_level="not bool")
|
|
2241
|
+
|
|
2242
|
+
|
|
2243
|
+
def test_list_elements_wrong_type(mock_elements_worker):
|
|
2244
|
+
with pytest.raises(AssertionError, match="type should be of type str"):
|
|
2245
|
+
mock_elements_worker.list_elements(type=1234)
|
|
2246
|
+
|
|
2247
|
+
|
|
2248
|
+
def test_list_elements_wrong_with_classes(mock_elements_worker):
|
|
2249
|
+
with pytest.raises(AssertionError, match="with_classes should be of type bool"):
|
|
2250
|
+
mock_elements_worker.list_elements(with_classes="not bool")
|
|
2251
|
+
|
|
2252
|
+
|
|
2253
|
+
def test_list_elements_wrong_with_corpus(mock_elements_worker):
|
|
2254
|
+
with pytest.raises(AssertionError, match="with_corpus should be of type bool"):
|
|
2255
|
+
mock_elements_worker.list_elements(with_corpus="not bool")
|
|
2256
|
+
|
|
2257
|
+
|
|
2258
|
+
def test_list_elements_wrong_with_has_children(mock_elements_worker):
|
|
2259
|
+
with pytest.raises(
|
|
2260
|
+
AssertionError, match="with_has_children should be of type bool"
|
|
2261
|
+
):
|
|
2262
|
+
mock_elements_worker.list_elements(with_has_children="not bool")
|
|
2263
|
+
|
|
2264
|
+
|
|
2265
|
+
def test_list_elements_wrong_with_zone(mock_elements_worker):
|
|
2266
|
+
with pytest.raises(AssertionError, match="with_zone should be of type bool"):
|
|
2267
|
+
mock_elements_worker.list_elements(with_zone="not bool")
|
|
2268
|
+
|
|
2269
|
+
|
|
2270
|
+
def test_list_elements_wrong_with_metadata(mock_elements_worker):
|
|
2271
|
+
with pytest.raises(AssertionError, match="with_metadata should be of type bool"):
|
|
2272
|
+
mock_elements_worker.list_elements(with_metadata="not bool")
|
|
2273
|
+
|
|
2274
|
+
|
|
2275
|
+
@pytest.mark.parametrize(
|
|
2276
|
+
("param", "value"),
|
|
2277
|
+
[
|
|
2278
|
+
("worker_run", 1234),
|
|
2279
|
+
("transcription_worker_run", 1234),
|
|
2280
|
+
],
|
|
2281
|
+
)
|
|
2282
|
+
def test_list_elements_wrong_worker_run(mock_elements_worker, param, value):
|
|
2283
|
+
with pytest.raises(AssertionError, match=f"{param} should be of type str or bool"):
|
|
2284
|
+
mock_elements_worker.list_elements(**{param: value})
|
|
2285
|
+
|
|
2286
|
+
|
|
2287
|
+
@pytest.mark.parametrize(
|
|
2288
|
+
("param", "alternative", "value"),
|
|
2289
|
+
[
|
|
2290
|
+
("worker_version", "worker_run", 1234),
|
|
2291
|
+
("transcription_worker_version", "transcription_worker_run", 1234),
|
|
2292
|
+
],
|
|
2293
|
+
)
|
|
2294
|
+
def test_list_elements_wrong_worker_version(
|
|
2295
|
+
mock_elements_worker, param, alternative, value
|
|
2296
|
+
):
|
|
2297
|
+
# WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
|
|
2298
|
+
with (
|
|
2299
|
+
pytest.deprecated_call(
|
|
2300
|
+
match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead."
|
|
2301
|
+
),
|
|
2302
|
+
pytest.raises(AssertionError, match=f"{param} should be of type str or bool"),
|
|
2303
|
+
):
|
|
2304
|
+
mock_elements_worker.list_elements(**{param: value})
|
|
2305
|
+
|
|
2306
|
+
|
|
2307
|
+
@pytest.mark.parametrize(
|
|
2308
|
+
"param",
|
|
2309
|
+
[
|
|
2310
|
+
"worker_run",
|
|
2311
|
+
"transcription_worker_run",
|
|
2312
|
+
],
|
|
2313
|
+
)
|
|
2314
|
+
def test_list_elements_wrong_bool_worker_run(mock_elements_worker, param):
|
|
2315
|
+
with pytest.raises(
|
|
2316
|
+
AssertionError, match=f"if of type bool, {param} can only be set to False"
|
|
2317
|
+
):
|
|
2318
|
+
mock_elements_worker.list_elements(**{param: True})
|
|
2319
|
+
|
|
2320
|
+
|
|
2321
|
+
@pytest.mark.parametrize(
|
|
2322
|
+
("param", "alternative"),
|
|
2323
|
+
[
|
|
2324
|
+
("worker_version", "worker_run"),
|
|
2325
|
+
("transcription_worker_version", "transcription_worker_run"),
|
|
2326
|
+
],
|
|
2327
|
+
)
|
|
2328
|
+
def test_list_elements_wrong_bool_worker_version(
|
|
2329
|
+
mock_elements_worker, param, alternative
|
|
2330
|
+
):
|
|
2331
|
+
# WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
|
|
2332
|
+
with (
|
|
2333
|
+
pytest.deprecated_call(
|
|
2334
|
+
match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead."
|
|
2335
|
+
),
|
|
2336
|
+
pytest.raises(
|
|
2337
|
+
AssertionError, match=f"if of type bool, {param} can only be set to False"
|
|
2338
|
+
),
|
|
2339
|
+
):
|
|
2340
|
+
mock_elements_worker.list_elements(**{param: True})
|
|
2341
|
+
|
|
2342
|
+
|
|
2343
|
+
def test_list_elements_api_error(responses, mock_elements_worker):
|
|
2344
|
+
responses.add(
|
|
2345
|
+
responses.GET,
|
|
2346
|
+
f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
|
|
2347
|
+
status=418,
|
|
2348
|
+
)
|
|
2349
|
+
|
|
2350
|
+
with pytest.raises(
|
|
2351
|
+
Exception, match="Stopping pagination as data will be incomplete"
|
|
2352
|
+
):
|
|
2353
|
+
next(mock_elements_worker.list_elements())
|
|
2354
|
+
|
|
2355
|
+
assert len(responses.calls) == len(BASE_API_CALLS) + 5
|
|
2356
|
+
assert [
|
|
2357
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
2358
|
+
] == BASE_API_CALLS + [
|
|
2359
|
+
# We do 5 retries
|
|
2360
|
+
(
|
|
2361
|
+
"GET",
|
|
2362
|
+
f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
|
|
2363
|
+
),
|
|
2364
|
+
(
|
|
2365
|
+
"GET",
|
|
2366
|
+
f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
|
|
2367
|
+
),
|
|
2368
|
+
(
|
|
2369
|
+
"GET",
|
|
2370
|
+
f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
|
|
2371
|
+
),
|
|
2372
|
+
(
|
|
2373
|
+
"GET",
|
|
2374
|
+
f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
|
|
2375
|
+
),
|
|
2376
|
+
(
|
|
2377
|
+
"GET",
|
|
2378
|
+
f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
|
|
2379
|
+
),
|
|
2380
|
+
]
|
|
2381
|
+
|
|
2382
|
+
|
|
2383
|
+
def test_list_elements(responses, mock_elements_worker):
|
|
2384
|
+
expected_children = [
|
|
2385
|
+
{
|
|
2386
|
+
"id": "0000",
|
|
2387
|
+
"type": "page",
|
|
2388
|
+
"name": "Test",
|
|
2389
|
+
"corpus": {},
|
|
2390
|
+
"thumbnail_url": None,
|
|
2391
|
+
"zone": {},
|
|
2392
|
+
"best_classes": None,
|
|
2393
|
+
"has_children": None,
|
|
2394
|
+
"worker_version_id": None,
|
|
2395
|
+
"worker_run_id": None,
|
|
2396
|
+
},
|
|
2397
|
+
{
|
|
2398
|
+
"id": "1111",
|
|
2399
|
+
"type": "page",
|
|
2400
|
+
"name": "Test 2",
|
|
2401
|
+
"corpus": {},
|
|
2402
|
+
"thumbnail_url": None,
|
|
2403
|
+
"zone": {},
|
|
2404
|
+
"best_classes": None,
|
|
2405
|
+
"has_children": None,
|
|
2406
|
+
"worker_version_id": None,
|
|
2407
|
+
"worker_run_id": None,
|
|
2408
|
+
},
|
|
2409
|
+
{
|
|
2410
|
+
"id": "2222",
|
|
2411
|
+
"type": "page",
|
|
2412
|
+
"name": "Test 3",
|
|
2413
|
+
"corpus": {},
|
|
2414
|
+
"thumbnail_url": None,
|
|
2415
|
+
"zone": {},
|
|
2416
|
+
"best_classes": None,
|
|
2417
|
+
"has_children": None,
|
|
2418
|
+
"worker_version_id": None,
|
|
2419
|
+
"worker_run_id": None,
|
|
2420
|
+
},
|
|
2421
|
+
]
|
|
2422
|
+
responses.add(
|
|
2423
|
+
responses.GET,
|
|
2424
|
+
f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
|
|
2425
|
+
status=200,
|
|
2426
|
+
json={
|
|
2427
|
+
"count": 3,
|
|
2428
|
+
"next": None,
|
|
2429
|
+
"results": expected_children,
|
|
2430
|
+
},
|
|
2431
|
+
)
|
|
2432
|
+
|
|
2433
|
+
for idx, child in enumerate(mock_elements_worker.list_elements()):
|
|
2434
|
+
assert child == expected_children[idx]
|
|
2435
|
+
|
|
2436
|
+
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
2437
|
+
assert [
|
|
2438
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
2439
|
+
] == BASE_API_CALLS + [
|
|
2440
|
+
(
|
|
2441
|
+
"GET",
|
|
2442
|
+
f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
|
|
2443
|
+
),
|
|
2444
|
+
]
|
|
2445
|
+
|
|
2446
|
+
|
|
2447
|
+
def test_list_elements_manual_worker_version(responses, mock_elements_worker):
|
|
2448
|
+
expected_children = [
|
|
2449
|
+
{
|
|
2450
|
+
"id": "0000",
|
|
2451
|
+
"type": "page",
|
|
2452
|
+
"name": "Test",
|
|
2453
|
+
"corpus": {},
|
|
2454
|
+
"thumbnail_url": None,
|
|
2455
|
+
"zone": {},
|
|
2456
|
+
"best_classes": None,
|
|
2457
|
+
"has_children": None,
|
|
2458
|
+
"worker_version_id": None,
|
|
2459
|
+
"worker_run_id": None,
|
|
2460
|
+
}
|
|
2461
|
+
]
|
|
2462
|
+
responses.add(
|
|
2463
|
+
responses.GET,
|
|
2464
|
+
f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/?worker_version=False",
|
|
2465
|
+
status=200,
|
|
2466
|
+
json={
|
|
2467
|
+
"count": 1,
|
|
2468
|
+
"next": None,
|
|
2469
|
+
"results": expected_children,
|
|
2470
|
+
},
|
|
2471
|
+
)
|
|
2472
|
+
|
|
2473
|
+
with pytest.deprecated_call(
|
|
2474
|
+
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
|
|
2475
|
+
):
|
|
2476
|
+
for idx, child in enumerate(
|
|
2477
|
+
mock_elements_worker.list_elements(worker_version=False)
|
|
2478
|
+
):
|
|
2479
|
+
assert child == expected_children[idx]
|
|
2480
|
+
|
|
2481
|
+
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
2482
|
+
assert [
|
|
2483
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
2484
|
+
] == BASE_API_CALLS + [
|
|
2485
|
+
(
|
|
2486
|
+
"GET",
|
|
2487
|
+
f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/?worker_version=False",
|
|
2488
|
+
),
|
|
2489
|
+
]
|
|
2490
|
+
|
|
2491
|
+
|
|
2492
|
+
def test_list_elements_manual_worker_run(responses, mock_elements_worker):
|
|
2493
|
+
expected_children = [
|
|
2494
|
+
{
|
|
2495
|
+
"id": "0000",
|
|
2496
|
+
"type": "page",
|
|
2497
|
+
"name": "Test",
|
|
2498
|
+
"corpus": {},
|
|
2499
|
+
"thumbnail_url": None,
|
|
2500
|
+
"zone": {},
|
|
2501
|
+
"best_classes": None,
|
|
2502
|
+
"has_children": None,
|
|
2503
|
+
"worker_version_id": None,
|
|
2504
|
+
"worker_run_id": None,
|
|
2505
|
+
}
|
|
2506
|
+
]
|
|
2507
|
+
responses.add(
|
|
2508
|
+
responses.GET,
|
|
2509
|
+
f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/?worker_run=False",
|
|
2510
|
+
status=200,
|
|
2511
|
+
json={
|
|
2512
|
+
"count": 1,
|
|
2513
|
+
"next": None,
|
|
2514
|
+
"results": expected_children,
|
|
2515
|
+
},
|
|
2516
|
+
)
|
|
2517
|
+
|
|
2518
|
+
for idx, child in enumerate(mock_elements_worker.list_elements(worker_run=False)):
|
|
2519
|
+
assert child == expected_children[idx]
|
|
2520
|
+
|
|
2521
|
+
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
2522
|
+
assert [
|
|
2523
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
2524
|
+
] == BASE_API_CALLS + [
|
|
2525
|
+
(
|
|
2526
|
+
"GET",
|
|
2527
|
+
f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/?worker_run=False",
|
|
2528
|
+
),
|
|
2529
|
+
]
|
|
2530
|
+
|
|
2531
|
+
|
|
2532
|
+
def test_list_elements_with_cache_unhandled_param(mock_elements_worker_with_cache):
|
|
2533
|
+
with pytest.raises(
|
|
2534
|
+
AssertionError,
|
|
2535
|
+
match="When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'",
|
|
2536
|
+
):
|
|
2537
|
+
mock_elements_worker_with_cache.list_elements(with_corpus=True)
|
|
2538
|
+
|
|
2539
|
+
|
|
2540
|
+
@pytest.mark.usefixtures("_mock_cached_elements")
|
|
2541
|
+
@pytest.mark.parametrize(
|
|
2542
|
+
("filters", "expected_ids"),
|
|
2543
|
+
[
|
|
2544
|
+
# Filter on element should give all elements inserted
|
|
2545
|
+
(
|
|
2546
|
+
{},
|
|
2547
|
+
(
|
|
2548
|
+
"99999999-9999-9999-9999-999999999999",
|
|
2549
|
+
"12341234-1234-1234-1234-123412341234",
|
|
2550
|
+
"11111111-1111-1111-1111-111111111111",
|
|
2551
|
+
"22222222-2222-2222-2222-222222222222",
|
|
2552
|
+
"33333333-3333-3333-3333-333333333333",
|
|
2553
|
+
),
|
|
2554
|
+
),
|
|
2555
|
+
# Filter on element and page should give the second element
|
|
2556
|
+
(
|
|
2557
|
+
{"type": "page"},
|
|
2558
|
+
("22222222-2222-2222-2222-222222222222",),
|
|
2559
|
+
),
|
|
2560
|
+
# Filter on element and worker run should give second
|
|
2561
|
+
(
|
|
2562
|
+
{
|
|
2563
|
+
"worker_run": "56785678-5678-5678-5678-567856785678",
|
|
2564
|
+
},
|
|
2565
|
+
(
|
|
2566
|
+
"12341234-1234-1234-1234-123412341234",
|
|
2567
|
+
"22222222-2222-2222-2222-222222222222",
|
|
2568
|
+
),
|
|
2569
|
+
),
|
|
2570
|
+
# Filter on element, manual worker run should give first and third
|
|
2571
|
+
(
|
|
2572
|
+
{"worker_run": False},
|
|
2573
|
+
(
|
|
2574
|
+
"99999999-9999-9999-9999-999999999999",
|
|
2575
|
+
"11111111-1111-1111-1111-111111111111",
|
|
2576
|
+
"33333333-3333-3333-3333-333333333333",
|
|
2577
|
+
),
|
|
2578
|
+
),
|
|
2579
|
+
],
|
|
2580
|
+
)
|
|
2581
|
+
def test_list_elements_with_cache(
|
|
2582
|
+
responses, mock_elements_worker_with_cache, filters, expected_ids
|
|
2583
|
+
):
|
|
2584
|
+
# Check we have 5 elements already present in database
|
|
2585
|
+
assert CachedElement.select().count() == 5
|
|
2586
|
+
|
|
2587
|
+
# Query database through cache
|
|
2588
|
+
elements = mock_elements_worker_with_cache.list_elements(**filters)
|
|
2589
|
+
assert elements.count() == len(expected_ids)
|
|
2590
|
+
for child, expected_id in zip(elements.order_by("id"), expected_ids, strict=True):
|
|
2591
|
+
assert child.id == UUID(expected_id)
|
|
2592
|
+
|
|
2593
|
+
# Check the worker never hits the API for elements
|
|
2594
|
+
assert len(responses.calls) == len(BASE_API_CALLS)
|
|
2595
|
+
assert [
|
|
2596
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
2597
|
+
] == BASE_API_CALLS
|
|
2598
|
+
|
|
2599
|
+
|
|
2600
|
+
@pytest.mark.usefixtures("_mock_cached_elements")
|
|
2601
|
+
@pytest.mark.parametrize(
|
|
2602
|
+
("filters", "expected_ids"),
|
|
2603
|
+
[
|
|
2604
|
+
# Filter on element and worker version
|
|
2605
|
+
(
|
|
2606
|
+
{
|
|
2607
|
+
"worker_version": "56785678-5678-5678-5678-567856785678",
|
|
2608
|
+
},
|
|
2609
|
+
(
|
|
2610
|
+
"12341234-1234-1234-1234-123412341234",
|
|
2611
|
+
"11111111-1111-1111-1111-111111111111",
|
|
2612
|
+
"22222222-2222-2222-2222-222222222222",
|
|
2613
|
+
),
|
|
2614
|
+
),
|
|
2615
|
+
# Filter on element, type double_page and worker version
|
|
2616
|
+
(
|
|
2617
|
+
{"type": "page", "worker_version": "56785678-5678-5678-5678-567856785678"},
|
|
2618
|
+
("22222222-2222-2222-2222-222222222222",),
|
|
2619
|
+
),
|
|
2620
|
+
# Filter on element, manual worker version
|
|
2621
|
+
(
|
|
2622
|
+
{"worker_version": False},
|
|
2623
|
+
(
|
|
2624
|
+
"99999999-9999-9999-9999-999999999999",
|
|
2625
|
+
"33333333-3333-3333-3333-333333333333",
|
|
2626
|
+
),
|
|
2627
|
+
),
|
|
2628
|
+
],
|
|
2629
|
+
)
|
|
2630
|
+
def test_list_elements_with_cache_deprecation(
|
|
2631
|
+
responses,
|
|
2632
|
+
mock_elements_worker_with_cache,
|
|
2633
|
+
filters,
|
|
2634
|
+
expected_ids,
|
|
2635
|
+
):
|
|
2636
|
+
# Check we have 5 elements already present in database
|
|
2637
|
+
assert CachedElement.select().count() == 5
|
|
2638
|
+
|
|
2639
|
+
with pytest.deprecated_call(
|
|
2640
|
+
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
|
|
2641
|
+
):
|
|
2642
|
+
# Query database through cache
|
|
2643
|
+
elements = mock_elements_worker_with_cache.list_elements(**filters)
|
|
2644
|
+
assert elements.count() == len(expected_ids)
|
|
2645
|
+
for child, expected_id in zip(elements.order_by("id"), expected_ids, strict=True):
|
|
2646
|
+
assert child.id == UUID(expected_id)
|
|
2647
|
+
|
|
2648
|
+
# Check the worker never hits the API for elements
|
|
2649
|
+
assert len(responses.calls) == len(BASE_API_CALLS)
|
|
2650
|
+
assert [
|
|
2651
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
2652
|
+
] == BASE_API_CALLS
|
|
2653
|
+
|
|
2654
|
+
|
|
1961
2655
|
def test_list_element_children_wrong_element(mock_elements_worker):
|
|
1962
2656
|
with pytest.raises(
|
|
1963
2657
|
AssertionError,
|