arkindex-base-worker 0.4.0rc6__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.4.0rc6.dist-info → arkindex_base_worker-0.5.0.dist-info}/METADATA +9 -12
- arkindex_base_worker-0.5.0.dist-info/RECORD +60 -0
- {arkindex_base_worker-0.4.0rc6.dist-info → arkindex_base_worker-0.5.0.dist-info}/WHEEL +1 -1
- {arkindex_base_worker-0.4.0rc6.dist-info → arkindex_base_worker-0.5.0.dist-info}/top_level.txt +1 -0
- arkindex_worker/__init__.py +3 -0
- arkindex_worker/cache.py +6 -25
- arkindex_worker/image.py +105 -66
- arkindex_worker/utils.py +2 -1
- arkindex_worker/worker/__init__.py +17 -31
- arkindex_worker/worker/base.py +16 -9
- arkindex_worker/worker/classification.py +36 -34
- arkindex_worker/worker/corpus.py +3 -3
- arkindex_worker/worker/dataset.py +9 -9
- arkindex_worker/worker/element.py +261 -231
- arkindex_worker/worker/entity.py +137 -206
- arkindex_worker/worker/image.py +3 -3
- arkindex_worker/worker/metadata.py +27 -38
- arkindex_worker/worker/task.py +9 -9
- arkindex_worker/worker/training.py +15 -11
- arkindex_worker/worker/transcription.py +77 -71
- examples/standalone/python/worker.py +171 -0
- examples/tooled/python/worker.py +50 -0
- tests/conftest.py +22 -36
- tests/test_base_worker.py +1 -1
- tests/test_cache.py +1 -2
- tests/test_dataset_worker.py +1 -1
- tests/test_elements_worker/test_element.py +200 -26
- tests/test_elements_worker/{test_entity_create.py → test_entity.py} +220 -227
- tests/test_elements_worker/test_metadata.py +0 -47
- tests/test_elements_worker/test_training.py +8 -8
- tests/test_elements_worker/test_worker.py +15 -14
- tests/test_image.py +244 -126
- tests/test_merge.py +0 -7
- tests/test_utils.py +37 -0
- arkindex_base_worker-0.4.0rc6.dist-info/RECORD +0 -61
- arkindex_worker/worker/version.py +0 -58
- tests/test_elements_worker/test_entity_list_and_check.py +0 -160
- tests/test_elements_worker/test_version.py +0 -60
- {arkindex_base_worker-0.4.0rc6.dist-info → arkindex_base_worker-0.5.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -119,19 +119,6 @@ def test_create_metadata_wrong_value(mock_elements_worker):
|
|
|
119
119
|
)
|
|
120
120
|
|
|
121
121
|
|
|
122
|
-
def test_create_metadata_wrong_entity(mock_elements_worker):
|
|
123
|
-
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
124
|
-
|
|
125
|
-
with pytest.raises(AssertionError, match="entity should be of type str"):
|
|
126
|
-
mock_elements_worker.create_metadata(
|
|
127
|
-
element=elt,
|
|
128
|
-
type=MetaType.Location,
|
|
129
|
-
name="Teklia",
|
|
130
|
-
value="La Turbine, Grenoble 38000",
|
|
131
|
-
entity=1234,
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
|
|
135
122
|
def test_create_metadata_api_error(responses, mock_elements_worker):
|
|
136
123
|
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
137
124
|
responses.add(
|
|
@@ -188,7 +175,6 @@ def test_create_metadata(responses, mock_elements_worker):
|
|
|
188
175
|
"type": "location",
|
|
189
176
|
"name": "Teklia",
|
|
190
177
|
"value": "La Turbine, Grenoble 38000",
|
|
191
|
-
"entity_id": None,
|
|
192
178
|
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
193
179
|
}
|
|
194
180
|
assert metadata_id == "12345678-1234-1234-1234-123456789123"
|
|
@@ -223,7 +209,6 @@ def test_create_metadata_cached_element(responses, mock_elements_worker_with_cac
|
|
|
223
209
|
"type": "location",
|
|
224
210
|
"name": "Teklia",
|
|
225
211
|
"value": "La Turbine, Grenoble 38000",
|
|
226
|
-
"entity_id": None,
|
|
227
212
|
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
228
213
|
}
|
|
229
214
|
assert metadata_id == "12345678-1234-1234-1234-123456789123"
|
|
@@ -239,7 +224,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
|
|
|
239
224
|
"type": MetaType.Text,
|
|
240
225
|
"name": "Year",
|
|
241
226
|
"value": "2024",
|
|
242
|
-
"entity_id": "entity_id",
|
|
243
227
|
},
|
|
244
228
|
]
|
|
245
229
|
if batch_size > 1:
|
|
@@ -256,7 +240,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
|
|
|
256
240
|
"name": metadata_list[0]["name"],
|
|
257
241
|
"value": metadata_list[0]["value"],
|
|
258
242
|
"dates": [],
|
|
259
|
-
"entity_id": None,
|
|
260
243
|
},
|
|
261
244
|
{
|
|
262
245
|
"id": "fake_metadata_id2",
|
|
@@ -264,7 +247,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
|
|
|
264
247
|
"name": metadata_list[1]["name"],
|
|
265
248
|
"value": metadata_list[1]["value"],
|
|
266
249
|
"dates": [],
|
|
267
|
-
"entity_id": metadata_list[1]["entity_id"],
|
|
268
250
|
},
|
|
269
251
|
],
|
|
270
252
|
},
|
|
@@ -284,7 +266,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
|
|
|
284
266
|
"name": meta["name"],
|
|
285
267
|
"value": meta["value"],
|
|
286
268
|
"dates": [],
|
|
287
|
-
"entity_id": meta.get("entity_id"),
|
|
288
269
|
}
|
|
289
270
|
],
|
|
290
271
|
},
|
|
@@ -316,7 +297,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
|
|
|
316
297
|
first_meta = {
|
|
317
298
|
**metadata_list[0],
|
|
318
299
|
"type": metadata_list[0]["type"].value,
|
|
319
|
-
"entity_id": None,
|
|
320
300
|
}
|
|
321
301
|
second_meta = {**metadata_list[1], "type": metadata_list[1]["type"].value}
|
|
322
302
|
empty_payload = {
|
|
@@ -346,7 +326,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
|
|
|
346
326
|
"name": metadata_list[0]["name"],
|
|
347
327
|
"value": metadata_list[0]["value"],
|
|
348
328
|
"dates": [],
|
|
349
|
-
"entity_id": None,
|
|
350
329
|
},
|
|
351
330
|
{
|
|
352
331
|
"id": "fake_metadata_id2",
|
|
@@ -354,7 +333,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
|
|
|
354
333
|
"name": metadata_list[1]["name"],
|
|
355
334
|
"value": metadata_list[1]["value"],
|
|
356
335
|
"dates": [],
|
|
357
|
-
"entity_id": metadata_list[1]["entity_id"],
|
|
358
336
|
},
|
|
359
337
|
]
|
|
360
338
|
|
|
@@ -373,7 +351,6 @@ def test_create_metadata_bulk_cached_element(
|
|
|
373
351
|
"type": MetaType.Text,
|
|
374
352
|
"name": "Year",
|
|
375
353
|
"value": "2024",
|
|
376
|
-
"entity_id": "entity_id",
|
|
377
354
|
},
|
|
378
355
|
]
|
|
379
356
|
if batch_size > 1:
|
|
@@ -390,7 +367,6 @@ def test_create_metadata_bulk_cached_element(
|
|
|
390
367
|
"name": metadata_list[0]["name"],
|
|
391
368
|
"value": metadata_list[0]["value"],
|
|
392
369
|
"dates": [],
|
|
393
|
-
"entity_id": None,
|
|
394
370
|
},
|
|
395
371
|
{
|
|
396
372
|
"id": "fake_metadata_id2",
|
|
@@ -398,7 +374,6 @@ def test_create_metadata_bulk_cached_element(
|
|
|
398
374
|
"name": metadata_list[1]["name"],
|
|
399
375
|
"value": metadata_list[1]["value"],
|
|
400
376
|
"dates": [],
|
|
401
|
-
"entity_id": metadata_list[1]["entity_id"],
|
|
402
377
|
},
|
|
403
378
|
],
|
|
404
379
|
},
|
|
@@ -418,7 +393,6 @@ def test_create_metadata_bulk_cached_element(
|
|
|
418
393
|
"name": meta["name"],
|
|
419
394
|
"value": meta["value"],
|
|
420
395
|
"dates": [],
|
|
421
|
-
"entity_id": meta.get("entity_id"),
|
|
422
396
|
}
|
|
423
397
|
],
|
|
424
398
|
},
|
|
@@ -450,7 +424,6 @@ def test_create_metadata_bulk_cached_element(
|
|
|
450
424
|
first_meta = {
|
|
451
425
|
**metadata_list[0],
|
|
452
426
|
"type": metadata_list[0]["type"].value,
|
|
453
|
-
"entity_id": None,
|
|
454
427
|
}
|
|
455
428
|
second_meta = {**metadata_list[1], "type": metadata_list[1]["type"].value}
|
|
456
429
|
empty_payload = {
|
|
@@ -480,7 +453,6 @@ def test_create_metadata_bulk_cached_element(
|
|
|
480
453
|
"name": metadata_list[0]["name"],
|
|
481
454
|
"value": metadata_list[0]["value"],
|
|
482
455
|
"dates": [],
|
|
483
|
-
"entity_id": None,
|
|
484
456
|
},
|
|
485
457
|
{
|
|
486
458
|
"id": "fake_metadata_id2",
|
|
@@ -488,7 +460,6 @@ def test_create_metadata_bulk_cached_element(
|
|
|
488
460
|
"name": metadata_list[1]["name"],
|
|
489
461
|
"value": metadata_list[1]["value"],
|
|
490
462
|
"dates": [],
|
|
491
|
-
"entity_id": metadata_list[1]["entity_id"],
|
|
492
463
|
},
|
|
493
464
|
]
|
|
494
465
|
|
|
@@ -552,23 +523,6 @@ def test_create_metadata_bulk_wrong_value(mock_elements_worker, wrong_value):
|
|
|
552
523
|
)
|
|
553
524
|
|
|
554
525
|
|
|
555
|
-
@pytest.mark.parametrize("wrong_entity", [[1, 2, 3, 4], 1234, 12.5])
|
|
556
|
-
def test_create_metadata_bulk_wrong_entity(mock_elements_worker, wrong_entity):
|
|
557
|
-
element = Element({"id": "fake_element_id"})
|
|
558
|
-
wrong_metadata_list = [
|
|
559
|
-
{
|
|
560
|
-
"type": MetaType.Text,
|
|
561
|
-
"name": "fake_name",
|
|
562
|
-
"value": "fake_value",
|
|
563
|
-
"entity_id": wrong_entity,
|
|
564
|
-
}
|
|
565
|
-
]
|
|
566
|
-
with pytest.raises(AssertionError, match="entity_id should be None or a str"):
|
|
567
|
-
mock_elements_worker.create_metadata_bulk(
|
|
568
|
-
element=element, metadata_list=wrong_metadata_list
|
|
569
|
-
)
|
|
570
|
-
|
|
571
|
-
|
|
572
526
|
def test_create_metadata_bulk_api_error(responses, mock_elements_worker):
|
|
573
527
|
element = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
574
528
|
metadata_list = [
|
|
@@ -576,7 +530,6 @@ def test_create_metadata_bulk_api_error(responses, mock_elements_worker):
|
|
|
576
530
|
"type": MetaType.Text,
|
|
577
531
|
"name": "fake_name",
|
|
578
532
|
"value": "fake_value",
|
|
579
|
-
"entity_id": "fake_entity_id",
|
|
580
533
|
}
|
|
581
534
|
]
|
|
582
535
|
responses.add(
|
|
@@ -8,7 +8,7 @@ from arkindex_worker.worker import BaseWorker
|
|
|
8
8
|
from arkindex_worker.worker.training import TrainingMixin, create_archive
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
@pytest.fixture
|
|
11
|
+
@pytest.fixture
|
|
12
12
|
def mock_training_worker(monkeypatch):
|
|
13
13
|
class TrainingWorker(BaseWorker, TrainingMixin):
|
|
14
14
|
"""
|
|
@@ -22,7 +22,7 @@ def mock_training_worker(monkeypatch):
|
|
|
22
22
|
return training_worker
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
@pytest.fixture
|
|
25
|
+
@pytest.fixture
|
|
26
26
|
def default_model_version():
|
|
27
27
|
return {
|
|
28
28
|
"id": "model_version_id",
|
|
@@ -51,9 +51,9 @@ def test_create_archive(model_file_dir):
|
|
|
51
51
|
archive_hash,
|
|
52
52
|
):
|
|
53
53
|
assert zst_archive_path.exists(), "The archive was not created"
|
|
54
|
-
assert (
|
|
55
|
-
|
|
56
|
-
)
|
|
54
|
+
assert hash == "c5aedde18a768757351068b840c8c8f9", (
|
|
55
|
+
"Hash was not properly computed"
|
|
56
|
+
)
|
|
57
57
|
assert 300 < size < 700
|
|
58
58
|
|
|
59
59
|
assert not zst_archive_path.exists(), "Auto removal failed"
|
|
@@ -69,9 +69,9 @@ def test_create_archive_with_subfolder(model_file_dir_with_subfolder):
|
|
|
69
69
|
archive_hash,
|
|
70
70
|
):
|
|
71
71
|
assert zst_archive_path.exists(), "The archive was not created"
|
|
72
|
-
assert (
|
|
73
|
-
|
|
74
|
-
)
|
|
72
|
+
assert hash == "3e453881404689e6e125144d2db3e605", (
|
|
73
|
+
"Hash was not properly computed"
|
|
74
|
+
)
|
|
75
75
|
assert 300 < size < 1500
|
|
76
76
|
|
|
77
77
|
assert not zst_archive_path.exists(), "Auto removal failed"
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import logging
|
|
2
3
|
import sys
|
|
3
4
|
from argparse import Namespace
|
|
4
5
|
from uuid import UUID
|
|
@@ -685,7 +686,7 @@ def test_run_cache(monkeypatch, mocker, mock_elements_worker_with_cache):
|
|
|
685
686
|
|
|
686
687
|
|
|
687
688
|
def test_start_activity_conflict(
|
|
688
|
-
monkeypatch, responses,
|
|
689
|
+
monkeypatch, responses, mock_elements_worker_with_list, caplog
|
|
689
690
|
):
|
|
690
691
|
# Disable second configure call from run()
|
|
691
692
|
monkeypatch.setattr(mock_elements_worker_with_list, "configure", lambda: None)
|
|
@@ -700,9 +701,6 @@ def test_start_activity_conflict(
|
|
|
700
701
|
content="Either this activity does not exists or this state is not allowed.",
|
|
701
702
|
),
|
|
702
703
|
)
|
|
703
|
-
from arkindex_worker.worker import logger
|
|
704
|
-
|
|
705
|
-
logger.info = mocker.MagicMock()
|
|
706
704
|
|
|
707
705
|
mock_elements_worker_with_list.run()
|
|
708
706
|
|
|
@@ -716,14 +714,15 @@ def test_start_activity_conflict(
|
|
|
716
714
|
"http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
|
|
717
715
|
),
|
|
718
716
|
]
|
|
719
|
-
assert
|
|
720
|
-
|
|
721
|
-
|
|
717
|
+
assert [(record.levelno, record.message) for record in caplog.records] == [
|
|
718
|
+
(logging.INFO, "Processing page Test Page n°1 (1234-deadbeef) (1/1)"),
|
|
719
|
+
(logging.INFO, "Skipping element 1234-deadbeef as it was already processed"),
|
|
720
|
+
(logging.INFO, "Ran on 1 element: 1 completed, 0 failed"),
|
|
722
721
|
]
|
|
723
722
|
|
|
724
723
|
|
|
725
724
|
def test_start_activity_error(
|
|
726
|
-
monkeypatch, responses,
|
|
725
|
+
monkeypatch, responses, mock_elements_worker_with_list, caplog
|
|
727
726
|
):
|
|
728
727
|
# Disable second configure call from run()
|
|
729
728
|
monkeypatch.setattr(mock_elements_worker_with_list, "configure", lambda: None)
|
|
@@ -732,11 +731,8 @@ def test_start_activity_error(
|
|
|
732
731
|
responses.add(
|
|
733
732
|
responses.PUT,
|
|
734
733
|
"http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
|
|
735
|
-
body=Exception("A wild Petilil appears
|
|
734
|
+
body=Exception("A wild Petilil appears!"),
|
|
736
735
|
)
|
|
737
|
-
from arkindex_worker.worker import logger
|
|
738
|
-
|
|
739
|
-
logger.error = mocker.MagicMock()
|
|
740
736
|
|
|
741
737
|
with pytest.raises(SystemExit):
|
|
742
738
|
mock_elements_worker_with_list.run()
|
|
@@ -755,8 +751,13 @@ def test_start_activity_error(
|
|
|
755
751
|
"http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
|
|
756
752
|
),
|
|
757
753
|
]
|
|
758
|
-
assert
|
|
759
|
-
|
|
754
|
+
assert [(record.levelno, record.message) for record in caplog.records] == [
|
|
755
|
+
(logging.INFO, "Processing page Test Page n°1 (1234-deadbeef) (1/1)"),
|
|
756
|
+
(
|
|
757
|
+
logging.WARNING,
|
|
758
|
+
"Failed running worker on element 1234-deadbeef: Exception('A wild Petilil appears!')",
|
|
759
|
+
),
|
|
760
|
+
(logging.ERROR, "Ran on 1 element: 0 completed, 1 failed"),
|
|
760
761
|
]
|
|
761
762
|
|
|
762
763
|
|