arkindex-base-worker 0.3.5rc6__py3-none-any.whl → 0.3.6rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arkindex_base_worker-0.3.6rc2.dist-info/METADATA +39 -0
- arkindex_base_worker-0.3.6rc2.dist-info/RECORD +40 -0
- arkindex_worker/__init__.py +0 -1
- arkindex_worker/cache.py +19 -25
- arkindex_worker/image.py +16 -17
- arkindex_worker/models.py +24 -21
- arkindex_worker/utils.py +18 -19
- arkindex_worker/worker/__init__.py +17 -27
- arkindex_worker/worker/base.py +12 -7
- arkindex_worker/worker/classification.py +13 -15
- arkindex_worker/worker/dataset.py +3 -4
- arkindex_worker/worker/element.py +80 -76
- arkindex_worker/worker/entity.py +28 -30
- arkindex_worker/worker/metadata.py +21 -27
- arkindex_worker/worker/task.py +2 -3
- arkindex_worker/worker/training.py +25 -26
- arkindex_worker/worker/transcription.py +37 -34
- arkindex_worker/worker/version.py +1 -2
- tests/conftest.py +56 -76
- tests/test_base_worker.py +38 -32
- tests/test_cache.py +14 -7
- tests/test_dataset_worker.py +25 -22
- tests/test_element.py +0 -1
- tests/test_elements_worker/__init__.py +0 -1
- tests/test_elements_worker/test_classifications.py +0 -1
- tests/test_elements_worker/test_cli.py +22 -17
- tests/test_elements_worker/test_dataset.py +9 -10
- tests/test_elements_worker/test_elements.py +58 -63
- tests/test_elements_worker/test_entities.py +10 -20
- tests/test_elements_worker/test_metadata.py +72 -96
- tests/test_elements_worker/test_task.py +22 -20
- tests/test_elements_worker/test_training.py +20 -13
- tests/test_elements_worker/test_transcriptions.py +6 -10
- tests/test_elements_worker/test_worker.py +16 -14
- tests/test_image.py +21 -20
- tests/test_merge.py +5 -6
- tests/test_utils.py +0 -1
- arkindex_base_worker-0.3.5rc6.dist-info/METADATA +0 -27
- arkindex_base_worker-0.3.5rc6.dist-info/RECORD +0 -42
- arkindex_worker/git.py +0 -392
- tests/test_git.py +0 -480
- {arkindex_base_worker-0.3.5rc6.dist-info → arkindex_base_worker-0.3.6rc2.dist-info}/WHEEL +0 -0
- {arkindex_base_worker-0.3.5rc6.dist-info → arkindex_base_worker-0.3.6rc2.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
1
|
import json
|
|
3
2
|
import re
|
|
4
3
|
from uuid import UUID
|
|
@@ -56,7 +55,7 @@ def test_create_entity_wrong_type(mock_elements_worker):
|
|
|
56
55
|
)
|
|
57
56
|
|
|
58
57
|
|
|
59
|
-
def test_create_entity_wrong_corpus(
|
|
58
|
+
def test_create_entity_wrong_corpus(mock_elements_worker):
|
|
60
59
|
# Triggering an error on metas param, not giving corpus should work since
|
|
61
60
|
# ARKINDEX_CORPUS_ID environment variable is set on mock_elements_worker
|
|
62
61
|
with pytest.raises(AssertionError, match="metas should be of type dict"):
|
|
@@ -760,22 +759,13 @@ def test_list_corpus_entities(responses, mock_elements_worker):
|
|
|
760
759
|
]
|
|
761
760
|
|
|
762
761
|
|
|
763
|
-
@pytest.mark.parametrize(
|
|
764
|
-
"wrong_name",
|
|
765
|
-
[
|
|
766
|
-
1234,
|
|
767
|
-
12.5,
|
|
768
|
-
],
|
|
769
|
-
)
|
|
762
|
+
@pytest.mark.parametrize("wrong_name", [1234, 12.5])
|
|
770
763
|
def test_list_corpus_entities_wrong_name(mock_elements_worker, wrong_name):
|
|
771
764
|
with pytest.raises(AssertionError, match="name should be of type str"):
|
|
772
765
|
mock_elements_worker.list_corpus_entities(name=wrong_name)
|
|
773
766
|
|
|
774
767
|
|
|
775
|
-
@pytest.mark.parametrize(
|
|
776
|
-
"wrong_parent",
|
|
777
|
-
[{"id": "element_id"}, 12.5, "blabla"],
|
|
778
|
-
)
|
|
768
|
+
@pytest.mark.parametrize("wrong_parent", [{"id": "element_id"}, 12.5, "blabla"])
|
|
779
769
|
def test_list_corpus_entities_wrong_parent(mock_elements_worker, wrong_parent):
|
|
780
770
|
with pytest.raises(AssertionError, match="parent should be of type Element"):
|
|
781
771
|
mock_elements_worker.list_corpus_entities(parent=wrong_parent)
|
|
@@ -850,7 +840,7 @@ def test_check_required_entity_types_no_creation_allowed(
|
|
|
850
840
|
] == BASE_API_CALLS
|
|
851
841
|
|
|
852
842
|
|
|
853
|
-
@pytest.mark.parametrize("transcription",
|
|
843
|
+
@pytest.mark.parametrize("transcription", [None, "not a transcription", 1])
|
|
854
844
|
def test_create_transcription_entities_wrong_transcription(
|
|
855
845
|
mock_elements_worker, transcription
|
|
856
846
|
):
|
|
@@ -865,8 +855,8 @@ def test_create_transcription_entities_wrong_transcription(
|
|
|
865
855
|
|
|
866
856
|
|
|
867
857
|
@pytest.mark.parametrize(
|
|
868
|
-
"entities, error",
|
|
869
|
-
|
|
858
|
+
("entities", "error"),
|
|
859
|
+
[
|
|
870
860
|
(None, "entities shouldn't be null and should be of type list"),
|
|
871
861
|
(
|
|
872
862
|
"not a list of entities",
|
|
@@ -886,7 +876,7 @@ def test_create_transcription_entities_wrong_transcription(
|
|
|
886
876
|
* 2,
|
|
887
877
|
"entities should be unique",
|
|
888
878
|
),
|
|
889
|
-
|
|
879
|
+
],
|
|
890
880
|
)
|
|
891
881
|
def test_create_transcription_entities_wrong_entities(
|
|
892
882
|
mock_elements_worker, entities, error
|
|
@@ -909,8 +899,8 @@ def test_create_transcription_entities_wrong_entities_subtype(mock_elements_work
|
|
|
909
899
|
|
|
910
900
|
|
|
911
901
|
@pytest.mark.parametrize(
|
|
912
|
-
"entity, error",
|
|
913
|
-
|
|
902
|
+
("entity", "error"),
|
|
903
|
+
[
|
|
914
904
|
(
|
|
915
905
|
{
|
|
916
906
|
"name": None,
|
|
@@ -989,7 +979,7 @@ def test_create_transcription_entities_wrong_entities_subtype(mock_elements_work
|
|
|
989
979
|
},
|
|
990
980
|
"Entity at index 0 in entities: confidence should be None or a float in [0..1] range",
|
|
991
981
|
),
|
|
992
|
-
|
|
982
|
+
],
|
|
993
983
|
)
|
|
994
984
|
def test_create_transcription_entities_wrong_entity(
|
|
995
985
|
mock_elements_worker, entity, error
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
1
|
import json
|
|
3
2
|
import re
|
|
4
3
|
|
|
@@ -247,22 +246,20 @@ def test_create_metadata_cached_element(responses, mock_elements_worker_with_cac
|
|
|
247
246
|
|
|
248
247
|
|
|
249
248
|
@pytest.mark.parametrize(
|
|
250
|
-
"
|
|
249
|
+
"metadata_list",
|
|
251
250
|
[
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
]
|
|
262
|
-
),
|
|
251
|
+
[{"type": MetaType.Text, "name": "fake_name", "value": "fake_value"}],
|
|
252
|
+
[
|
|
253
|
+
{
|
|
254
|
+
"type": MetaType.Text,
|
|
255
|
+
"name": "fake_name",
|
|
256
|
+
"value": "fake_value",
|
|
257
|
+
"entity_id": "fake_entity_id",
|
|
258
|
+
}
|
|
259
|
+
],
|
|
263
260
|
],
|
|
264
261
|
)
|
|
265
|
-
def test_create_metadatas(responses, mock_elements_worker,
|
|
262
|
+
def test_create_metadatas(responses, mock_elements_worker, metadata_list):
|
|
266
263
|
element = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
267
264
|
responses.add(
|
|
268
265
|
responses.POST,
|
|
@@ -273,17 +270,19 @@ def test_create_metadatas(responses, mock_elements_worker, metadatas):
|
|
|
273
270
|
"metadata_list": [
|
|
274
271
|
{
|
|
275
272
|
"id": "fake_metadata_id",
|
|
276
|
-
"type":
|
|
277
|
-
"name":
|
|
278
|
-
"value":
|
|
273
|
+
"type": metadata_list[0]["type"].value,
|
|
274
|
+
"name": metadata_list[0]["name"],
|
|
275
|
+
"value": metadata_list[0]["value"],
|
|
279
276
|
"dates": [],
|
|
280
|
-
"entity_id":
|
|
277
|
+
"entity_id": metadata_list[0].get("entity_id"),
|
|
281
278
|
}
|
|
282
279
|
],
|
|
283
280
|
},
|
|
284
281
|
)
|
|
285
282
|
|
|
286
|
-
|
|
283
|
+
created_metadata_list = mock_elements_worker.create_metadatas(
|
|
284
|
+
element, metadata_list
|
|
285
|
+
)
|
|
287
286
|
|
|
288
287
|
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
289
288
|
assert [
|
|
@@ -296,42 +295,40 @@ def test_create_metadatas(responses, mock_elements_worker, metadatas):
|
|
|
296
295
|
]
|
|
297
296
|
assert json.loads(responses.calls[-1].request.body)["metadata_list"] == [
|
|
298
297
|
{
|
|
299
|
-
"type":
|
|
300
|
-
"name":
|
|
301
|
-
"value":
|
|
302
|
-
"entity_id":
|
|
298
|
+
"type": metadata_list[0]["type"].value,
|
|
299
|
+
"name": metadata_list[0]["name"],
|
|
300
|
+
"value": metadata_list[0]["value"],
|
|
301
|
+
"entity_id": metadata_list[0].get("entity_id"),
|
|
303
302
|
}
|
|
304
303
|
]
|
|
305
|
-
assert
|
|
304
|
+
assert created_metadata_list == [
|
|
306
305
|
{
|
|
307
306
|
"id": "fake_metadata_id",
|
|
308
|
-
"type":
|
|
309
|
-
"name":
|
|
310
|
-
"value":
|
|
307
|
+
"type": metadata_list[0]["type"].value,
|
|
308
|
+
"name": metadata_list[0]["name"],
|
|
309
|
+
"value": metadata_list[0]["value"],
|
|
311
310
|
"dates": [],
|
|
312
|
-
"entity_id":
|
|
311
|
+
"entity_id": metadata_list[0].get("entity_id"),
|
|
313
312
|
}
|
|
314
313
|
]
|
|
315
314
|
|
|
316
315
|
|
|
317
316
|
@pytest.mark.parametrize(
|
|
318
|
-
"
|
|
317
|
+
"metadata_list",
|
|
319
318
|
[
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
]
|
|
330
|
-
),
|
|
319
|
+
[{"type": MetaType.Text, "name": "fake_name", "value": "fake_value"}],
|
|
320
|
+
[
|
|
321
|
+
{
|
|
322
|
+
"type": MetaType.Text,
|
|
323
|
+
"name": "fake_name",
|
|
324
|
+
"value": "fake_value",
|
|
325
|
+
"entity_id": "fake_entity_id",
|
|
326
|
+
}
|
|
327
|
+
],
|
|
331
328
|
],
|
|
332
329
|
)
|
|
333
330
|
def test_create_metadatas_cached_element(
|
|
334
|
-
responses, mock_elements_worker_with_cache,
|
|
331
|
+
responses, mock_elements_worker_with_cache, metadata_list
|
|
335
332
|
):
|
|
336
333
|
element = CachedElement.create(
|
|
337
334
|
id="12341234-1234-1234-1234-123412341234", type="thing"
|
|
@@ -345,18 +342,18 @@ def test_create_metadatas_cached_element(
|
|
|
345
342
|
"metadata_list": [
|
|
346
343
|
{
|
|
347
344
|
"id": "fake_metadata_id",
|
|
348
|
-
"type":
|
|
349
|
-
"name":
|
|
350
|
-
"value":
|
|
345
|
+
"type": metadata_list[0]["type"].value,
|
|
346
|
+
"name": metadata_list[0]["name"],
|
|
347
|
+
"value": metadata_list[0]["value"],
|
|
351
348
|
"dates": [],
|
|
352
|
-
"entity_id":
|
|
349
|
+
"entity_id": metadata_list[0].get("entity_id"),
|
|
353
350
|
}
|
|
354
351
|
],
|
|
355
352
|
},
|
|
356
353
|
)
|
|
357
354
|
|
|
358
|
-
|
|
359
|
-
element,
|
|
355
|
+
created_metadata_list = mock_elements_worker_with_cache.create_metadatas(
|
|
356
|
+
element, metadata_list
|
|
360
357
|
)
|
|
361
358
|
|
|
362
359
|
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
@@ -370,35 +367,27 @@ def test_create_metadatas_cached_element(
|
|
|
370
367
|
]
|
|
371
368
|
assert json.loads(responses.calls[-1].request.body)["metadata_list"] == [
|
|
372
369
|
{
|
|
373
|
-
"type":
|
|
374
|
-
"name":
|
|
375
|
-
"value":
|
|
376
|
-
"entity_id":
|
|
370
|
+
"type": metadata_list[0]["type"].value,
|
|
371
|
+
"name": metadata_list[0]["name"],
|
|
372
|
+
"value": metadata_list[0]["value"],
|
|
373
|
+
"entity_id": metadata_list[0].get("entity_id"),
|
|
377
374
|
}
|
|
378
375
|
]
|
|
379
|
-
assert
|
|
376
|
+
assert created_metadata_list == [
|
|
380
377
|
{
|
|
381
378
|
"id": "fake_metadata_id",
|
|
382
|
-
"type":
|
|
383
|
-
"name":
|
|
384
|
-
"value":
|
|
379
|
+
"type": metadata_list[0]["type"].value,
|
|
380
|
+
"name": metadata_list[0]["name"],
|
|
381
|
+
"value": metadata_list[0]["value"],
|
|
385
382
|
"dates": [],
|
|
386
|
-
"entity_id":
|
|
383
|
+
"entity_id": metadata_list[0].get("entity_id"),
|
|
387
384
|
}
|
|
388
385
|
]
|
|
389
386
|
|
|
390
387
|
|
|
391
|
-
@pytest.mark.parametrize(
|
|
392
|
-
"wrong_element",
|
|
393
|
-
[
|
|
394
|
-
None,
|
|
395
|
-
"not_element_type",
|
|
396
|
-
1234,
|
|
397
|
-
12.5,
|
|
398
|
-
],
|
|
399
|
-
)
|
|
388
|
+
@pytest.mark.parametrize("wrong_element", [None, "not_element_type", 1234, 12.5])
|
|
400
389
|
def test_create_metadatas_wrong_element(mock_elements_worker, wrong_element):
|
|
401
|
-
|
|
390
|
+
wrong_metadata_list = [
|
|
402
391
|
{"type": MetaType.Text, "name": "fake_name", "value": "fake_value"}
|
|
403
392
|
]
|
|
404
393
|
with pytest.raises(
|
|
@@ -406,48 +395,42 @@ def test_create_metadatas_wrong_element(mock_elements_worker, wrong_element):
|
|
|
406
395
|
match="element shouldn't be null and should be of type Element or CachedElement",
|
|
407
396
|
):
|
|
408
397
|
mock_elements_worker.create_metadatas(
|
|
409
|
-
element=wrong_element, metadatas=
|
|
398
|
+
element=wrong_element, metadatas=wrong_metadata_list
|
|
410
399
|
)
|
|
411
400
|
|
|
412
401
|
|
|
413
|
-
@pytest.mark.parametrize(
|
|
414
|
-
"wrong_type",
|
|
415
|
-
[
|
|
416
|
-
None,
|
|
417
|
-
"not_metadata_type",
|
|
418
|
-
1234,
|
|
419
|
-
12.5,
|
|
420
|
-
],
|
|
421
|
-
)
|
|
402
|
+
@pytest.mark.parametrize("wrong_type", [None, "not_metadata_type", 1234, 12.5])
|
|
422
403
|
def test_create_metadatas_wrong_type(mock_elements_worker, wrong_type):
|
|
423
404
|
element = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
424
|
-
|
|
405
|
+
wrong_metadata_list = [
|
|
406
|
+
{"type": wrong_type, "name": "fake_name", "value": "fake_value"}
|
|
407
|
+
]
|
|
425
408
|
with pytest.raises(
|
|
426
409
|
AssertionError, match="type shouldn't be null and should be of type MetaType"
|
|
427
410
|
):
|
|
428
411
|
mock_elements_worker.create_metadatas(
|
|
429
|
-
element=element, metadatas=
|
|
412
|
+
element=element, metadatas=wrong_metadata_list
|
|
430
413
|
)
|
|
431
414
|
|
|
432
415
|
|
|
433
|
-
@pytest.mark.parametrize("wrong_name", [
|
|
416
|
+
@pytest.mark.parametrize("wrong_name", [None, 1234, 12.5, [1, 2, 3, 4]])
|
|
434
417
|
def test_create_metadatas_wrong_name(mock_elements_worker, wrong_name):
|
|
435
418
|
element = Element({"id": "fake_element_id"})
|
|
436
|
-
|
|
419
|
+
wrong_metadata_list = [
|
|
437
420
|
{"type": MetaType.Text, "name": wrong_name, "value": "fake_value"}
|
|
438
421
|
]
|
|
439
422
|
with pytest.raises(
|
|
440
423
|
AssertionError, match="name shouldn't be null and should be of type str"
|
|
441
424
|
):
|
|
442
425
|
mock_elements_worker.create_metadatas(
|
|
443
|
-
element=element, metadatas=
|
|
426
|
+
element=element, metadatas=wrong_metadata_list
|
|
444
427
|
)
|
|
445
428
|
|
|
446
429
|
|
|
447
|
-
@pytest.mark.parametrize("wrong_value", [
|
|
430
|
+
@pytest.mark.parametrize("wrong_value", [None, [1, 2, 3, 4]])
|
|
448
431
|
def test_create_metadatas_wrong_value(mock_elements_worker, wrong_value):
|
|
449
432
|
element = Element({"id": "fake_element_id"})
|
|
450
|
-
|
|
433
|
+
wrong_metadata_list = [
|
|
451
434
|
{"type": MetaType.Text, "name": "fake_name", "value": wrong_value}
|
|
452
435
|
]
|
|
453
436
|
with pytest.raises(
|
|
@@ -457,21 +440,14 @@ def test_create_metadatas_wrong_value(mock_elements_worker, wrong_value):
|
|
|
457
440
|
),
|
|
458
441
|
):
|
|
459
442
|
mock_elements_worker.create_metadatas(
|
|
460
|
-
element=element, metadatas=
|
|
443
|
+
element=element, metadatas=wrong_metadata_list
|
|
461
444
|
)
|
|
462
445
|
|
|
463
446
|
|
|
464
|
-
@pytest.mark.parametrize(
|
|
465
|
-
"wrong_entity",
|
|
466
|
-
[
|
|
467
|
-
[1, 2, 3, 4],
|
|
468
|
-
1234,
|
|
469
|
-
12.5,
|
|
470
|
-
],
|
|
471
|
-
)
|
|
447
|
+
@pytest.mark.parametrize("wrong_entity", [[1, 2, 3, 4], 1234, 12.5])
|
|
472
448
|
def test_create_metadatas_wrong_entity(mock_elements_worker, wrong_entity):
|
|
473
449
|
element = Element({"id": "fake_element_id"})
|
|
474
|
-
|
|
450
|
+
wrong_metadata_list = [
|
|
475
451
|
{
|
|
476
452
|
"type": MetaType.Text,
|
|
477
453
|
"name": "fake_name",
|
|
@@ -481,13 +457,13 @@ def test_create_metadatas_wrong_entity(mock_elements_worker, wrong_entity):
|
|
|
481
457
|
]
|
|
482
458
|
with pytest.raises(AssertionError, match="entity_id should be None or a str"):
|
|
483
459
|
mock_elements_worker.create_metadatas(
|
|
484
|
-
element=element, metadatas=
|
|
460
|
+
element=element, metadatas=wrong_metadata_list
|
|
485
461
|
)
|
|
486
462
|
|
|
487
463
|
|
|
488
464
|
def test_create_metadatas_api_error(responses, mock_elements_worker):
|
|
489
465
|
element = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
490
|
-
|
|
466
|
+
metadata_list = [
|
|
491
467
|
{
|
|
492
468
|
"type": MetaType.Text,
|
|
493
469
|
"name": "fake_name",
|
|
@@ -502,7 +478,7 @@ def test_create_metadatas_api_error(responses, mock_elements_worker):
|
|
|
502
478
|
)
|
|
503
479
|
|
|
504
480
|
with pytest.raises(ErrorResponse):
|
|
505
|
-
mock_elements_worker.create_metadatas(element,
|
|
481
|
+
mock_elements_worker.create_metadatas(element, metadata_list)
|
|
506
482
|
|
|
507
483
|
assert len(responses.calls) == len(BASE_API_CALLS) + 5
|
|
508
484
|
assert [
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
1
|
import uuid
|
|
3
2
|
|
|
4
3
|
import pytest
|
|
@@ -12,8 +11,8 @@ TASK_ID = uuid.UUID("cafecafe-cafe-cafe-cafe-cafecafecafe")
|
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
@pytest.mark.parametrize(
|
|
15
|
-
"payload, error",
|
|
16
|
-
|
|
14
|
+
("payload", "error"),
|
|
15
|
+
[
|
|
17
16
|
# Task ID
|
|
18
17
|
(
|
|
19
18
|
{"task_id": None},
|
|
@@ -23,7 +22,7 @@ TASK_ID = uuid.UUID("cafecafe-cafe-cafe-cafe-cafecafecafe")
|
|
|
23
22
|
{"task_id": "12341234-1234-1234-1234-123412341234"},
|
|
24
23
|
"task_id shouldn't be null and should be an UUID",
|
|
25
24
|
),
|
|
26
|
-
|
|
25
|
+
],
|
|
27
26
|
)
|
|
28
27
|
def test_list_artifacts_wrong_param_task_id(mock_dataset_worker, payload, error):
|
|
29
28
|
with pytest.raises(AssertionError, match=error):
|
|
@@ -60,7 +59,7 @@ def test_list_artifacts(
|
|
|
60
59
|
expected_results = [
|
|
61
60
|
{
|
|
62
61
|
"id": "artifact_1",
|
|
63
|
-
"path": "dataset_id.
|
|
62
|
+
"path": "dataset_id.tar.zst",
|
|
64
63
|
"size": 42,
|
|
65
64
|
"content_type": "application/zstd",
|
|
66
65
|
"s3_put_url": None,
|
|
@@ -97,8 +96,8 @@ def test_list_artifacts(
|
|
|
97
96
|
|
|
98
97
|
|
|
99
98
|
@pytest.mark.parametrize(
|
|
100
|
-
"payload, error",
|
|
101
|
-
|
|
99
|
+
("payload", "error"),
|
|
100
|
+
[
|
|
102
101
|
# Task ID
|
|
103
102
|
(
|
|
104
103
|
{"task_id": None},
|
|
@@ -108,7 +107,7 @@ def test_list_artifacts(
|
|
|
108
107
|
{"task_id": "12341234-1234-1234-1234-123412341234"},
|
|
109
108
|
"task_id shouldn't be null and should be an UUID",
|
|
110
109
|
),
|
|
111
|
-
|
|
110
|
+
],
|
|
112
111
|
)
|
|
113
112
|
def test_download_artifact_wrong_param_task_id(
|
|
114
113
|
mock_dataset_worker, default_artifact, payload, error
|
|
@@ -124,8 +123,8 @@ def test_download_artifact_wrong_param_task_id(
|
|
|
124
123
|
|
|
125
124
|
|
|
126
125
|
@pytest.mark.parametrize(
|
|
127
|
-
"payload, error",
|
|
128
|
-
|
|
126
|
+
("payload", "error"),
|
|
127
|
+
[
|
|
129
128
|
# Artifact
|
|
130
129
|
(
|
|
131
130
|
{"artifact": None},
|
|
@@ -135,7 +134,7 @@ def test_download_artifact_wrong_param_task_id(
|
|
|
135
134
|
{"artifact": "not artifact type"},
|
|
136
135
|
"artifact shouldn't be null and should be an Artifact",
|
|
137
136
|
),
|
|
138
|
-
|
|
137
|
+
],
|
|
139
138
|
)
|
|
140
139
|
def test_download_artifact_wrong_param_artifact(
|
|
141
140
|
mock_dataset_worker, default_artifact, payload, error
|
|
@@ -153,7 +152,7 @@ def test_download_artifact_wrong_param_artifact(
|
|
|
153
152
|
def test_download_artifact_api_error(responses, mock_dataset_worker, default_artifact):
|
|
154
153
|
responses.add(
|
|
155
154
|
responses.GET,
|
|
156
|
-
f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.
|
|
155
|
+
f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst",
|
|
157
156
|
status=500,
|
|
158
157
|
)
|
|
159
158
|
|
|
@@ -167,11 +166,11 @@ def test_download_artifact_api_error(responses, mock_dataset_worker, default_art
|
|
|
167
166
|
(call.request.method, call.request.url) for call in responses.calls
|
|
168
167
|
] == BASE_API_CALLS + [
|
|
169
168
|
# The API call is retried 5 times
|
|
170
|
-
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.
|
|
171
|
-
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.
|
|
172
|
-
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.
|
|
173
|
-
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.
|
|
174
|
-
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.
|
|
169
|
+
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst"),
|
|
170
|
+
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst"),
|
|
171
|
+
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst"),
|
|
172
|
+
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst"),
|
|
173
|
+
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst"),
|
|
175
174
|
]
|
|
176
175
|
|
|
177
176
|
|
|
@@ -181,11 +180,14 @@ def test_download_artifact(
|
|
|
181
180
|
default_artifact,
|
|
182
181
|
):
|
|
183
182
|
archive_path = (
|
|
184
|
-
FIXTURES_DIR
|
|
183
|
+
FIXTURES_DIR
|
|
184
|
+
/ "extract_parent_archives"
|
|
185
|
+
/ "first_parent"
|
|
186
|
+
/ "arkindex_data.tar.zst"
|
|
185
187
|
)
|
|
186
188
|
responses.add(
|
|
187
189
|
responses.GET,
|
|
188
|
-
f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.
|
|
190
|
+
f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst",
|
|
189
191
|
status=200,
|
|
190
192
|
body=archive_path.read_bytes(),
|
|
191
193
|
content_type="application/zstd",
|
|
@@ -202,5 +204,5 @@ def test_download_artifact(
|
|
|
202
204
|
assert [
|
|
203
205
|
(call.request.method, call.request.url) for call in responses.calls
|
|
204
206
|
] == BASE_API_CALLS + [
|
|
205
|
-
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.
|
|
207
|
+
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst"),
|
|
206
208
|
]
|
|
@@ -1,16 +1,14 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
1
|
import logging
|
|
3
2
|
import sys
|
|
4
3
|
|
|
5
4
|
import pytest
|
|
6
|
-
import responses
|
|
7
5
|
|
|
8
6
|
from arkindex.mock import MockApiClient
|
|
9
7
|
from arkindex_worker.worker import BaseWorker
|
|
10
8
|
from arkindex_worker.worker.training import TrainingMixin, create_archive
|
|
11
9
|
|
|
12
10
|
|
|
13
|
-
@pytest.fixture
|
|
11
|
+
@pytest.fixture()
|
|
14
12
|
def mock_training_worker(monkeypatch):
|
|
15
13
|
class TrainingWorker(BaseWorker, TrainingMixin):
|
|
16
14
|
"""
|
|
@@ -24,7 +22,7 @@ def mock_training_worker(monkeypatch):
|
|
|
24
22
|
return training_worker
|
|
25
23
|
|
|
26
24
|
|
|
27
|
-
@pytest.fixture
|
|
25
|
+
@pytest.fixture()
|
|
28
26
|
def default_model_version():
|
|
29
27
|
return {
|
|
30
28
|
"id": "model_version_id",
|
|
@@ -79,23 +77,32 @@ def test_create_archive_with_subfolder(model_file_dir_with_subfolder):
|
|
|
79
77
|
assert not zst_archive_path.exists(), "Auto removal failed"
|
|
80
78
|
|
|
81
79
|
|
|
82
|
-
def test_handle_s3_uploading_errors(mock_training_worker, model_file_dir):
|
|
80
|
+
def test_handle_s3_uploading_errors(responses, mock_training_worker, model_file_dir):
|
|
83
81
|
s3_endpoint_url = "http://s3.localhost.com"
|
|
84
82
|
responses.add_passthru(s3_endpoint_url)
|
|
85
|
-
responses.add(responses.
|
|
83
|
+
responses.add(responses.PUT, s3_endpoint_url, status=400)
|
|
84
|
+
|
|
85
|
+
mock_training_worker.model_version = {
|
|
86
|
+
"state": "Created",
|
|
87
|
+
"s3_put_url": s3_endpoint_url,
|
|
88
|
+
}
|
|
89
|
+
|
|
86
90
|
file_path = model_file_dir / "model_file.pth"
|
|
87
|
-
with pytest.raises(
|
|
88
|
-
|
|
91
|
+
with pytest.raises(
|
|
92
|
+
Exception,
|
|
93
|
+
match="400 Client Error: Bad Request for url: http://s3.localhost.com/",
|
|
94
|
+
):
|
|
95
|
+
mock_training_worker.upload_to_s3(file_path)
|
|
89
96
|
|
|
90
97
|
|
|
91
98
|
@pytest.mark.parametrize(
|
|
92
99
|
"method",
|
|
93
100
|
[
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
101
|
+
"publish_model_version",
|
|
102
|
+
"create_model_version",
|
|
103
|
+
"update_model_version",
|
|
104
|
+
"upload_to_s3",
|
|
105
|
+
"validate_model_version",
|
|
99
106
|
],
|
|
100
107
|
)
|
|
101
108
|
def test_training_mixin_read_only(mock_training_worker, method, caplog):
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
1
|
import json
|
|
3
2
|
import re
|
|
4
3
|
from uuid import UUID
|
|
@@ -1867,9 +1866,10 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
|
|
|
1867
1866
|
]
|
|
1868
1867
|
|
|
1869
1868
|
|
|
1869
|
+
@pytest.mark.usefixtures("_mock_cached_transcriptions")
|
|
1870
1870
|
@pytest.mark.parametrize(
|
|
1871
|
-
"filters, expected_ids",
|
|
1872
|
-
|
|
1871
|
+
("filters", "expected_ids"),
|
|
1872
|
+
[
|
|
1873
1873
|
# Filter on element should give first and sixth transcription
|
|
1874
1874
|
(
|
|
1875
1875
|
{
|
|
@@ -1963,14 +1963,10 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
|
|
|
1963
1963
|
},
|
|
1964
1964
|
("66666666-6666-6666-6666-666666666666",),
|
|
1965
1965
|
),
|
|
1966
|
-
|
|
1966
|
+
],
|
|
1967
1967
|
)
|
|
1968
1968
|
def test_list_transcriptions_with_cache(
|
|
1969
|
-
responses,
|
|
1970
|
-
mock_elements_worker_with_cache,
|
|
1971
|
-
mock_cached_transcriptions,
|
|
1972
|
-
filters,
|
|
1973
|
-
expected_ids,
|
|
1969
|
+
responses, mock_elements_worker_with_cache, filters, expected_ids
|
|
1974
1970
|
):
|
|
1975
1971
|
# Check we have 5 elements already present in database
|
|
1976
1972
|
assert CachedTranscription.select().count() == 6
|
|
@@ -1979,7 +1975,7 @@ def test_list_transcriptions_with_cache(
|
|
|
1979
1975
|
transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters)
|
|
1980
1976
|
assert transcriptions.count() == len(expected_ids)
|
|
1981
1977
|
for transcription, expected_id in zip(
|
|
1982
|
-
transcriptions.order_by(CachedTranscription.id), expected_ids
|
|
1978
|
+
transcriptions.order_by(CachedTranscription.id), expected_ids, strict=True
|
|
1983
1979
|
):
|
|
1984
1980
|
assert transcription.id == UUID(expected_id)
|
|
1985
1981
|
|