arkindex-base-worker 0.4.0__py3-none-any.whl → 0.4.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/METADATA +13 -15
  2. arkindex_base_worker-0.4.0a1.dist-info/RECORD +51 -0
  3. {arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/WHEEL +1 -1
  4. arkindex_worker/cache.py +1 -1
  5. arkindex_worker/image.py +1 -120
  6. arkindex_worker/utils.py +0 -82
  7. arkindex_worker/worker/__init__.py +161 -46
  8. arkindex_worker/worker/base.py +11 -36
  9. arkindex_worker/worker/classification.py +18 -34
  10. arkindex_worker/worker/corpus.py +4 -21
  11. arkindex_worker/worker/dataset.py +1 -71
  12. arkindex_worker/worker/element.py +91 -352
  13. arkindex_worker/worker/entity.py +11 -11
  14. arkindex_worker/worker/metadata.py +9 -19
  15. arkindex_worker/worker/task.py +4 -5
  16. arkindex_worker/worker/training.py +18 -21
  17. arkindex_worker/worker/transcription.py +68 -89
  18. arkindex_worker/worker/version.py +1 -3
  19. tests/__init__.py +1 -1
  20. tests/conftest.py +45 -33
  21. tests/test_base_worker.py +3 -204
  22. tests/test_dataset_worker.py +4 -7
  23. tests/test_elements_worker/{test_classification.py → test_classifications.py} +61 -194
  24. tests/test_elements_worker/test_corpus.py +1 -32
  25. tests/test_elements_worker/test_dataset.py +1 -1
  26. tests/test_elements_worker/test_elements.py +2734 -0
  27. tests/test_elements_worker/{test_entity_create.py → test_entities.py} +160 -26
  28. tests/test_elements_worker/test_image.py +1 -2
  29. tests/test_elements_worker/test_metadata.py +99 -224
  30. tests/test_elements_worker/test_task.py +1 -1
  31. tests/test_elements_worker/test_training.py +43 -17
  32. tests/test_elements_worker/test_transcriptions.py +2102 -0
  33. tests/test_elements_worker/test_worker.py +280 -563
  34. tests/test_image.py +204 -429
  35. tests/test_merge.py +2 -1
  36. tests/test_utils.py +3 -66
  37. arkindex_base_worker-0.4.0.dist-info/RECORD +0 -61
  38. arkindex_worker/worker/process.py +0 -92
  39. tests/test_elements_worker/test_element.py +0 -427
  40. tests/test_elements_worker/test_element_create_multiple.py +0 -715
  41. tests/test_elements_worker/test_element_create_single.py +0 -528
  42. tests/test_elements_worker/test_element_list_children.py +0 -969
  43. tests/test_elements_worker/test_element_list_parents.py +0 -530
  44. tests/test_elements_worker/test_entity_list_and_check.py +0 -160
  45. tests/test_elements_worker/test_process.py +0 -89
  46. tests/test_elements_worker/test_transcription_create.py +0 -873
  47. tests/test_elements_worker/test_transcription_create_with_elements.py +0 -951
  48. tests/test_elements_worker/test_transcription_list.py +0 -450
  49. tests/test_elements_worker/test_version.py +0 -60
  50. {arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/LICENSE +0 -0
  51. {arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/top_level.txt +0 -0
tests/conftest.py CHANGED
@@ -23,15 +23,10 @@ from arkindex_worker.cache import (
23
23
  init_cache_db,
24
24
  )
25
25
  from arkindex_worker.models import Artifact, Dataset, Set
26
- from arkindex_worker.worker import (
27
- BaseWorker,
28
- DatasetWorker,
29
- ElementsWorker,
30
- ProcessMode,
31
- )
26
+ from arkindex_worker.worker import BaseWorker, DatasetWorker, ElementsWorker
32
27
  from arkindex_worker.worker.dataset import DatasetState
33
28
  from arkindex_worker.worker.transcription import TextOrientation
34
- from tests import CORPUS_ID, SAMPLES_DIR
29
+ from tests import CORPUS_ID, FIXTURES_DIR, PROCESS_ID, SAMPLES_DIR
35
30
 
36
31
  __yaml_cache = {}
37
32
 
@@ -46,7 +41,7 @@ def _disable_sleep(monkeypatch):
46
41
  monkeypatch.setattr(time, "sleep", lambda x: None)
47
42
 
48
43
 
49
- @pytest.fixture
44
+ @pytest.fixture()
50
45
  def _cache_yaml(monkeypatch):
51
46
  """
52
47
  Cache all calls to yaml.safe_load in order to speedup
@@ -111,7 +106,7 @@ def _give_env_variable(monkeypatch):
111
106
  monkeypatch.setenv("ARKINDEX_WORKER_RUN_ID", "56785678-5678-5678-5678-567856785678")
112
107
 
113
108
 
114
- @pytest.fixture
109
+ @pytest.fixture()
115
110
  def _mock_worker_run_api(responses):
116
111
  """Provide a mock API response to get worker run information"""
117
112
  payload = {
@@ -180,7 +175,7 @@ def _mock_worker_run_api(responses):
180
175
  )
181
176
 
182
177
 
183
- @pytest.fixture
178
+ @pytest.fixture()
184
179
  def _mock_worker_run_no_revision_api(responses):
185
180
  """Provide a mock API response to get worker run not linked to a revision information"""
186
181
  payload = {
@@ -247,7 +242,7 @@ def _mock_worker_run_no_revision_api(responses):
247
242
  )
248
243
 
249
244
 
250
- @pytest.fixture
245
+ @pytest.fixture()
251
246
  def _mock_activity_calls(responses):
252
247
  """
253
248
  Mock responses when updating the activity state for multiple element of the same version
@@ -259,7 +254,7 @@ def _mock_activity_calls(responses):
259
254
  )
260
255
 
261
256
 
262
- @pytest.fixture
257
+ @pytest.fixture()
263
258
  def mock_elements_worker(monkeypatch, _mock_worker_run_api):
264
259
  """Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest"""
265
260
  monkeypatch.setattr(sys, "argv", ["worker"])
@@ -268,7 +263,7 @@ def mock_elements_worker(monkeypatch, _mock_worker_run_api):
268
263
  return worker
269
264
 
270
265
 
271
- @pytest.fixture
266
+ @pytest.fixture()
272
267
  def mock_elements_worker_read_only(monkeypatch):
273
268
  """Build and configure an ElementsWorker with fixed CLI parameters to avoid issues with pytest"""
274
269
  monkeypatch.setattr(sys, "argv", ["worker", "--dev"])
@@ -277,12 +272,14 @@ def mock_elements_worker_read_only(monkeypatch):
277
272
  return worker
278
273
 
279
274
 
280
- @pytest.fixture
275
+ @pytest.fixture()
281
276
  def mock_elements_worker_with_list(monkeypatch, responses, mock_elements_worker):
282
277
  """
283
278
  Mock a worker instance to list and retrieve a single element
284
279
  """
285
- monkeypatch.setattr(mock_elements_worker, "get_elements", lambda: ["1234-deadbeef"])
280
+ monkeypatch.setattr(
281
+ mock_elements_worker, "list_elements", lambda: ["1234-deadbeef"]
282
+ )
286
283
  responses.add(
287
284
  responses.GET,
288
285
  "http://testserver/api/v1/element/1234-deadbeef/",
@@ -296,7 +293,7 @@ def mock_elements_worker_with_list(monkeypatch, responses, mock_elements_worker)
296
293
  return mock_elements_worker
297
294
 
298
295
 
299
- @pytest.fixture
296
+ @pytest.fixture()
300
297
  def mock_cache_db(tmp_path):
301
298
  cache_path = tmp_path / "db.sqlite"
302
299
 
@@ -307,7 +304,7 @@ def mock_cache_db(tmp_path):
307
304
  return cache_path
308
305
 
309
306
 
310
- @pytest.fixture
307
+ @pytest.fixture()
311
308
  def mock_base_worker_with_cache(monkeypatch, _mock_worker_run_api):
312
309
  """Build a BaseWorker using SQLite cache, also mocking a PONOS_TASK"""
313
310
  monkeypatch.setattr(sys, "argv", ["worker"])
@@ -318,7 +315,7 @@ def mock_base_worker_with_cache(monkeypatch, _mock_worker_run_api):
318
315
  return worker
319
316
 
320
317
 
321
- @pytest.fixture
318
+ @pytest.fixture()
322
319
  def mock_elements_worker_with_cache(monkeypatch, mock_cache_db, _mock_worker_run_api):
323
320
  """Build and configure an ElementsWorker using SQLite cache with fixed CLI parameters to avoid issues with pytest"""
324
321
  monkeypatch.setattr(sys, "argv", ["worker", "-d", str(mock_cache_db)])
@@ -329,17 +326,34 @@ def mock_elements_worker_with_cache(monkeypatch, mock_cache_db, _mock_worker_run
329
326
  return worker
330
327
 
331
328
 
332
- @pytest.fixture
329
+ @pytest.fixture()
330
+ def fake_page_element():
331
+ return json.loads((FIXTURES_DIR / "page_element.json").read_text())
332
+
333
+
334
+ @pytest.fixture()
335
+ def fake_ufcn_worker_version():
336
+ return json.loads(
337
+ (FIXTURES_DIR / "ufcn_line_historical_worker_version.json").read_text()
338
+ )
339
+
340
+
341
+ @pytest.fixture()
342
+ def fake_transcriptions_small():
343
+ return json.loads((FIXTURES_DIR / "line_transcriptions_small.json").read_text())
344
+
345
+
346
+ @pytest.fixture()
333
347
  def model_file_dir():
334
348
  return SAMPLES_DIR / "model_files"
335
349
 
336
350
 
337
- @pytest.fixture
351
+ @pytest.fixture()
338
352
  def model_file_dir_with_subfolder():
339
353
  return SAMPLES_DIR / "root_folder"
340
354
 
341
355
 
342
- @pytest.fixture
356
+ @pytest.fixture()
343
357
  def fake_dummy_worker():
344
358
  api_client = MockApiClient()
345
359
  worker = ElementsWorker()
@@ -347,7 +361,7 @@ def fake_dummy_worker():
347
361
  return worker
348
362
 
349
363
 
350
- @pytest.fixture
364
+ @pytest.fixture()
351
365
  def _mock_cached_elements(mock_cache_db):
352
366
  """Insert few elements in local cache"""
353
367
  CachedElement.create(
@@ -392,7 +406,7 @@ def _mock_cached_elements(mock_cache_db):
392
406
  assert CachedElement.select().count() == 5
393
407
 
394
408
 
395
- @pytest.fixture
409
+ @pytest.fixture()
396
410
  def _mock_cached_images(mock_cache_db):
397
411
  """Insert few elements in local cache"""
398
412
  CachedImage.create(
@@ -404,7 +418,7 @@ def _mock_cached_images(mock_cache_db):
404
418
  assert CachedImage.select().count() == 1
405
419
 
406
420
 
407
- @pytest.fixture
421
+ @pytest.fixture()
408
422
  def _mock_cached_transcriptions(mock_cache_db):
409
423
  """Insert few transcriptions in local cache, on a shared element"""
410
424
  CachedElement.create(
@@ -493,7 +507,7 @@ def _mock_cached_transcriptions(mock_cache_db):
493
507
  )
494
508
 
495
509
 
496
- @pytest.fixture
510
+ @pytest.fixture()
497
511
  def mock_databases(tmp_path):
498
512
  """
499
513
  Initialize several temporary databases
@@ -576,7 +590,7 @@ def mock_databases(tmp_path):
576
590
  return out
577
591
 
578
592
 
579
- @pytest.fixture
593
+ @pytest.fixture()
580
594
  def default_dataset():
581
595
  return Dataset(
582
596
  {
@@ -594,28 +608,26 @@ def default_dataset():
594
608
  )
595
609
 
596
610
 
597
- @pytest.fixture
611
+ @pytest.fixture()
598
612
  def default_train_set(default_dataset):
599
613
  return Set(name="train", dataset=default_dataset)
600
614
 
601
615
 
602
- @pytest.fixture
616
+ @pytest.fixture()
603
617
  def mock_dataset_worker(monkeypatch, mocker, _mock_worker_run_api):
604
618
  monkeypatch.setenv("PONOS_TASK", "my_task")
605
619
  mocker.patch.object(sys, "argv", ["worker"])
606
620
 
607
621
  dataset_worker = DatasetWorker()
608
622
  dataset_worker.configure()
609
-
610
- # Update process mode
611
- dataset_worker.process_information["mode"] = ProcessMode.Dataset
623
+ dataset_worker.process_information = {"id": PROCESS_ID}
612
624
 
613
625
  assert not dataset_worker.is_read_only
614
626
 
615
627
  return dataset_worker
616
628
 
617
629
 
618
- @pytest.fixture
630
+ @pytest.fixture()
619
631
  def mock_dev_dataset_worker(mocker):
620
632
  mocker.patch.object(
621
633
  sys,
@@ -640,7 +652,7 @@ def mock_dev_dataset_worker(mocker):
640
652
  return dataset_worker
641
653
 
642
654
 
643
- @pytest.fixture
655
+ @pytest.fixture()
644
656
  def default_artifact():
645
657
  return Artifact(
646
658
  **{
tests/test_base_worker.py CHANGED
@@ -206,7 +206,6 @@ def test_configure_worker_run(mocker, responses, caplog):
206
206
  "Loaded Worker Fake worker @ 123412 from API",
207
207
  ),
208
208
  ("arkindex_worker", logging.INFO, "Loaded user configuration from WorkerRun"),
209
- ("arkindex_worker", logging.INFO, "User configuration retrieved"),
210
209
  ]
211
210
 
212
211
  assert worker.user_configuration == {"a": "b"}
@@ -285,21 +284,12 @@ def test_configure_user_configuration_defaults(mocker, responses):
285
284
 
286
285
  worker.configure()
287
286
 
287
+ assert worker.config == {"param_1": "/some/path/file.pth", "param_2": 12}
288
288
  assert worker.user_configuration == {
289
289
  "integer_parameter": 0,
290
290
  "param_3": "Animula vagula blandula",
291
291
  "param_5": True,
292
292
  }
293
- # All configurations are merged
294
- assert worker.config == {
295
- # Default config
296
- "param_1": "/some/path/file.pth",
297
- "param_2": 12,
298
- # User config
299
- "integer_parameter": 0,
300
- "param_3": "Animula vagula blandula",
301
- "param_5": True,
302
- }
303
293
 
304
294
 
305
295
  @pytest.mark.parametrize("debug", [True, False])
@@ -668,7 +658,7 @@ def test_find_extras_directory_not_found(monkeypatch, extras_path, exists, error
668
658
  def test_find_parents_file_paths(responses, mock_base_worker_with_cache, tmp_path):
669
659
  responses.add(
670
660
  responses.GET,
671
- "http://testserver/api/v1/task/my_task/",
661
+ "http://testserver/api/v1/task/my_task/from-agent/",
672
662
  status=200,
673
663
  json={"parents": ["first", "second", "third"]},
674
664
  )
@@ -686,6 +676,7 @@ def test_find_parents_file_paths(responses, mock_base_worker_with_cache, tmp_pat
686
676
  mock_base_worker_with_cache.args = mock_base_worker_with_cache.parser.parse_args()
687
677
 
688
678
  mock_base_worker_with_cache.configure()
679
+ mock_base_worker_with_cache.configure_cache()
689
680
 
690
681
  assert mock_base_worker_with_cache.find_parents_file_paths(filename) == [
691
682
  tmp_path / "first" / filename,
@@ -762,195 +753,3 @@ def test_corpus_id_set_read_only_mode(
762
753
  mock_elements_worker_read_only.configure()
763
754
 
764
755
  assert mock_elements_worker_read_only.corpus_id == corpus_id
765
-
766
-
767
- @pytest.mark.parametrize(
768
- (
769
- "wk_version_config",
770
- "wk_version_user_config",
771
- "frontend_user_config",
772
- "model_config",
773
- "expected_config",
774
- ),
775
- [
776
- ({}, {}, {}, {}, {}),
777
- # Keep parameters from worker version configuration
778
- ({"parameter": 0}, {}, {}, {}, {"parameter": 0}),
779
- # Keep parameters from worker version configuration + user_config defaults
780
- (
781
- {"parameter": 0},
782
- {
783
- "parameter2": {
784
- "type": "int",
785
- "title": "Lambda",
786
- "default": 0,
787
- "required": False,
788
- }
789
- },
790
- {},
791
- {},
792
- {"parameter": 0, "parameter2": 0},
793
- ),
794
- # Keep parameters from worker version configuration + user_config no defaults
795
- (
796
- {"parameter": 0},
797
- {
798
- "parameter2": {
799
- "type": "int",
800
- "title": "Lambda",
801
- "required": False,
802
- }
803
- },
804
- {},
805
- {},
806
- {"parameter": 0},
807
- ),
808
- # Keep parameters from worker version configuration but user_config defaults overrides
809
- (
810
- {"parameter": 0},
811
- {
812
- "parameter": {
813
- "type": "int",
814
- "title": "Lambda",
815
- "default": 1,
816
- "required": False,
817
- }
818
- },
819
- {},
820
- {},
821
- {"parameter": 1},
822
- ),
823
- # Keep parameters from worker version configuration + frontend config
824
- (
825
- {"parameter": 0},
826
- {},
827
- {"parameter2": 0},
828
- {},
829
- {"parameter": 0, "parameter2": 0},
830
- ),
831
- # Keep parameters from worker version configuration + frontend config overrides
832
- ({"parameter": 0}, {}, {"parameter": 1}, {}, {"parameter": 1}),
833
- # Keep parameters from worker version configuration + model config
834
- (
835
- {"parameter": 0},
836
- {},
837
- {},
838
- {"parameter2": 0},
839
- {"parameter": 0, "parameter2": 0},
840
- ),
841
- # Keep parameters from worker version configuration + model config overrides
842
- ({"parameter": 0}, {}, {}, {"parameter": 1}, {"parameter": 1}),
843
- # Keep parameters from worker version configuration + user_config default + model config overrides
844
- (
845
- {"parameter": 0},
846
- {
847
- "parameter": {
848
- "type": "int",
849
- "title": "Lambda",
850
- "default": 1,
851
- "required": False,
852
- }
853
- },
854
- {},
855
- {"parameter": 2},
856
- {"parameter": 2},
857
- ),
858
- # Keep parameters from worker version configuration + model config + frontend config overrides
859
- ({"parameter": 0}, {}, {"parameter": 2}, {"parameter": 1}, {"parameter": 2}),
860
- # Keep parameters from worker version configuration + user_config default + model config + frontend config overrides all
861
- (
862
- {"parameter": 0},
863
- {
864
- "parameter": {
865
- "type": "int",
866
- "title": "Lambda",
867
- "default": 1,
868
- "required": False,
869
- }
870
- },
871
- {"parameter": 3},
872
- {"parameter": 2},
873
- {"parameter": 3},
874
- ),
875
- ],
876
- )
877
- def test_worker_config_multiple_source(
878
- monkeypatch,
879
- responses,
880
- wk_version_config,
881
- wk_version_user_config,
882
- frontend_user_config,
883
- model_config,
884
- expected_config,
885
- ):
886
- # Compute WorkerRun info
887
- payload = {
888
- "id": "56785678-5678-5678-5678-567856785678",
889
- "parents": [],
890
- "worker_version": {
891
- "id": "12341234-1234-1234-1234-123412341234",
892
- "configuration": {
893
- "docker": {"image": "python:3"},
894
- "configuration": wk_version_config,
895
- "secrets": [],
896
- "user_configuration": wk_version_user_config,
897
- },
898
- "revision": {
899
- "hash": "deadbeef1234",
900
- "name": "some git revision",
901
- },
902
- "docker_image": "python:3",
903
- "docker_image_name": "python:3",
904
- "state": "created",
905
- "worker": {
906
- "id": "deadbeef-1234-5678-1234-worker",
907
- "name": "Fake worker",
908
- "slug": "fake_worker",
909
- "type": "classifier",
910
- },
911
- },
912
- "configuration": {
913
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
914
- "name": "Configuration entered by user",
915
- "configuration": frontend_user_config,
916
- },
917
- "model_version": {
918
- "id": "12341234-1234-1234-1234-123412341234",
919
- "name": "Model version 1337",
920
- "configuration": model_config,
921
- "model": {
922
- "id": "hahahaha-haha-haha-haha-hahahahahaha",
923
- "name": "My model",
924
- },
925
- },
926
- "process": {
927
- "name": None,
928
- "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
929
- "state": "running",
930
- "mode": "workers",
931
- "corpus": CORPUS_ID,
932
- "use_cache": False,
933
- "activity_state": "ready",
934
- "model_id": None,
935
- "train_folder_id": None,
936
- "validation_folder_id": None,
937
- "test_folder_id": None,
938
- },
939
- "summary": "Worker Fake worker @ 123412",
940
- }
941
-
942
- responses.add(
943
- responses.GET,
944
- "http://testserver/api/v1/process/workers/56785678-5678-5678-5678-567856785678/",
945
- status=200,
946
- body=json.dumps(payload),
947
- content_type="application/json",
948
- )
949
-
950
- # Create and configure a worker
951
- monkeypatch.setattr(sys, "argv", ["worker"])
952
- worker = BaseWorker()
953
- worker.configure()
954
-
955
- # Check final config
956
- assert worker.config == expected_config
@@ -3,21 +3,18 @@ import uuid
3
3
  from argparse import ArgumentTypeError
4
4
 
5
5
  import pytest
6
+ from apistar.exceptions import ErrorResponse
6
7
 
7
- from arkindex.exceptions import ErrorResponse
8
8
  from arkindex_worker.models import Dataset, Set
9
- from arkindex_worker.worker.dataset import (
10
- DatasetState,
11
- MissingDatasetArchive,
12
- check_dataset_set,
13
- )
9
+ from arkindex_worker.worker import MissingDatasetArchive, check_dataset_set
10
+ from arkindex_worker.worker.dataset import DatasetState
14
11
  from tests import FIXTURES_DIR, PROCESS_ID
15
12
  from tests.test_elements_worker import BASE_API_CALLS
16
13
 
17
14
  RANDOM_UUID = uuid.uuid4()
18
15
 
19
16
 
20
- @pytest.fixture
17
+ @pytest.fixture()
21
18
  def tmp_archive(tmp_path):
22
19
  archive = tmp_path / "test_archive.tar.zst"
23
20
  archive.touch()