arkindex-base-worker 0.4.0rc6__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {arkindex_base_worker-0.4.0rc6.dist-info → arkindex_base_worker-0.5.0.dist-info}/METADATA +9 -12
  2. arkindex_base_worker-0.5.0.dist-info/RECORD +60 -0
  3. {arkindex_base_worker-0.4.0rc6.dist-info → arkindex_base_worker-0.5.0.dist-info}/WHEEL +1 -1
  4. {arkindex_base_worker-0.4.0rc6.dist-info → arkindex_base_worker-0.5.0.dist-info}/top_level.txt +1 -0
  5. arkindex_worker/__init__.py +3 -0
  6. arkindex_worker/cache.py +6 -25
  7. arkindex_worker/image.py +105 -66
  8. arkindex_worker/utils.py +2 -1
  9. arkindex_worker/worker/__init__.py +17 -31
  10. arkindex_worker/worker/base.py +16 -9
  11. arkindex_worker/worker/classification.py +36 -34
  12. arkindex_worker/worker/corpus.py +3 -3
  13. arkindex_worker/worker/dataset.py +9 -9
  14. arkindex_worker/worker/element.py +261 -231
  15. arkindex_worker/worker/entity.py +137 -206
  16. arkindex_worker/worker/image.py +3 -3
  17. arkindex_worker/worker/metadata.py +27 -38
  18. arkindex_worker/worker/task.py +9 -9
  19. arkindex_worker/worker/training.py +15 -11
  20. arkindex_worker/worker/transcription.py +77 -71
  21. examples/standalone/python/worker.py +171 -0
  22. examples/tooled/python/worker.py +50 -0
  23. tests/conftest.py +22 -36
  24. tests/test_base_worker.py +1 -1
  25. tests/test_cache.py +1 -2
  26. tests/test_dataset_worker.py +1 -1
  27. tests/test_elements_worker/test_element.py +200 -26
  28. tests/test_elements_worker/{test_entity_create.py → test_entity.py} +220 -227
  29. tests/test_elements_worker/test_metadata.py +0 -47
  30. tests/test_elements_worker/test_training.py +8 -8
  31. tests/test_elements_worker/test_worker.py +15 -14
  32. tests/test_image.py +244 -126
  33. tests/test_merge.py +0 -7
  34. tests/test_utils.py +37 -0
  35. arkindex_base_worker-0.4.0rc6.dist-info/RECORD +0 -61
  36. arkindex_worker/worker/version.py +0 -58
  37. tests/test_elements_worker/test_entity_list_and_check.py +0 -160
  38. tests/test_elements_worker/test_version.py +0 -60
  39. {arkindex_base_worker-0.4.0rc6.dist-info → arkindex_base_worker-0.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -119,19 +119,6 @@ def test_create_metadata_wrong_value(mock_elements_worker):
119
119
  )
120
120
 
121
121
 
122
- def test_create_metadata_wrong_entity(mock_elements_worker):
123
- elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
124
-
125
- with pytest.raises(AssertionError, match="entity should be of type str"):
126
- mock_elements_worker.create_metadata(
127
- element=elt,
128
- type=MetaType.Location,
129
- name="Teklia",
130
- value="La Turbine, Grenoble 38000",
131
- entity=1234,
132
- )
133
-
134
-
135
122
  def test_create_metadata_api_error(responses, mock_elements_worker):
136
123
  elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
137
124
  responses.add(
@@ -188,7 +175,6 @@ def test_create_metadata(responses, mock_elements_worker):
188
175
  "type": "location",
189
176
  "name": "Teklia",
190
177
  "value": "La Turbine, Grenoble 38000",
191
- "entity_id": None,
192
178
  "worker_run_id": "56785678-5678-5678-5678-567856785678",
193
179
  }
194
180
  assert metadata_id == "12345678-1234-1234-1234-123456789123"
@@ -223,7 +209,6 @@ def test_create_metadata_cached_element(responses, mock_elements_worker_with_cac
223
209
  "type": "location",
224
210
  "name": "Teklia",
225
211
  "value": "La Turbine, Grenoble 38000",
226
- "entity_id": None,
227
212
  "worker_run_id": "56785678-5678-5678-5678-567856785678",
228
213
  }
229
214
  assert metadata_id == "12345678-1234-1234-1234-123456789123"
@@ -239,7 +224,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
239
224
  "type": MetaType.Text,
240
225
  "name": "Year",
241
226
  "value": "2024",
242
- "entity_id": "entity_id",
243
227
  },
244
228
  ]
245
229
  if batch_size > 1:
@@ -256,7 +240,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
256
240
  "name": metadata_list[0]["name"],
257
241
  "value": metadata_list[0]["value"],
258
242
  "dates": [],
259
- "entity_id": None,
260
243
  },
261
244
  {
262
245
  "id": "fake_metadata_id2",
@@ -264,7 +247,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
264
247
  "name": metadata_list[1]["name"],
265
248
  "value": metadata_list[1]["value"],
266
249
  "dates": [],
267
- "entity_id": metadata_list[1]["entity_id"],
268
250
  },
269
251
  ],
270
252
  },
@@ -284,7 +266,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
284
266
  "name": meta["name"],
285
267
  "value": meta["value"],
286
268
  "dates": [],
287
- "entity_id": meta.get("entity_id"),
288
269
  }
289
270
  ],
290
271
  },
@@ -316,7 +297,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
316
297
  first_meta = {
317
298
  **metadata_list[0],
318
299
  "type": metadata_list[0]["type"].value,
319
- "entity_id": None,
320
300
  }
321
301
  second_meta = {**metadata_list[1], "type": metadata_list[1]["type"].value}
322
302
  empty_payload = {
@@ -346,7 +326,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
346
326
  "name": metadata_list[0]["name"],
347
327
  "value": metadata_list[0]["value"],
348
328
  "dates": [],
349
- "entity_id": None,
350
329
  },
351
330
  {
352
331
  "id": "fake_metadata_id2",
@@ -354,7 +333,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
354
333
  "name": metadata_list[1]["name"],
355
334
  "value": metadata_list[1]["value"],
356
335
  "dates": [],
357
- "entity_id": metadata_list[1]["entity_id"],
358
336
  },
359
337
  ]
360
338
 
@@ -373,7 +351,6 @@ def test_create_metadata_bulk_cached_element(
373
351
  "type": MetaType.Text,
374
352
  "name": "Year",
375
353
  "value": "2024",
376
- "entity_id": "entity_id",
377
354
  },
378
355
  ]
379
356
  if batch_size > 1:
@@ -390,7 +367,6 @@ def test_create_metadata_bulk_cached_element(
390
367
  "name": metadata_list[0]["name"],
391
368
  "value": metadata_list[0]["value"],
392
369
  "dates": [],
393
- "entity_id": None,
394
370
  },
395
371
  {
396
372
  "id": "fake_metadata_id2",
@@ -398,7 +374,6 @@ def test_create_metadata_bulk_cached_element(
398
374
  "name": metadata_list[1]["name"],
399
375
  "value": metadata_list[1]["value"],
400
376
  "dates": [],
401
- "entity_id": metadata_list[1]["entity_id"],
402
377
  },
403
378
  ],
404
379
  },
@@ -418,7 +393,6 @@ def test_create_metadata_bulk_cached_element(
418
393
  "name": meta["name"],
419
394
  "value": meta["value"],
420
395
  "dates": [],
421
- "entity_id": meta.get("entity_id"),
422
396
  }
423
397
  ],
424
398
  },
@@ -450,7 +424,6 @@ def test_create_metadata_bulk_cached_element(
450
424
  first_meta = {
451
425
  **metadata_list[0],
452
426
  "type": metadata_list[0]["type"].value,
453
- "entity_id": None,
454
427
  }
455
428
  second_meta = {**metadata_list[1], "type": metadata_list[1]["type"].value}
456
429
  empty_payload = {
@@ -480,7 +453,6 @@ def test_create_metadata_bulk_cached_element(
480
453
  "name": metadata_list[0]["name"],
481
454
  "value": metadata_list[0]["value"],
482
455
  "dates": [],
483
- "entity_id": None,
484
456
  },
485
457
  {
486
458
  "id": "fake_metadata_id2",
@@ -488,7 +460,6 @@ def test_create_metadata_bulk_cached_element(
488
460
  "name": metadata_list[1]["name"],
489
461
  "value": metadata_list[1]["value"],
490
462
  "dates": [],
491
- "entity_id": metadata_list[1]["entity_id"],
492
463
  },
493
464
  ]
494
465
 
@@ -552,23 +523,6 @@ def test_create_metadata_bulk_wrong_value(mock_elements_worker, wrong_value):
552
523
  )
553
524
 
554
525
 
555
- @pytest.mark.parametrize("wrong_entity", [[1, 2, 3, 4], 1234, 12.5])
556
- def test_create_metadata_bulk_wrong_entity(mock_elements_worker, wrong_entity):
557
- element = Element({"id": "fake_element_id"})
558
- wrong_metadata_list = [
559
- {
560
- "type": MetaType.Text,
561
- "name": "fake_name",
562
- "value": "fake_value",
563
- "entity_id": wrong_entity,
564
- }
565
- ]
566
- with pytest.raises(AssertionError, match="entity_id should be None or a str"):
567
- mock_elements_worker.create_metadata_bulk(
568
- element=element, metadata_list=wrong_metadata_list
569
- )
570
-
571
-
572
526
  def test_create_metadata_bulk_api_error(responses, mock_elements_worker):
573
527
  element = Element({"id": "12341234-1234-1234-1234-123412341234"})
574
528
  metadata_list = [
@@ -576,7 +530,6 @@ def test_create_metadata_bulk_api_error(responses, mock_elements_worker):
576
530
  "type": MetaType.Text,
577
531
  "name": "fake_name",
578
532
  "value": "fake_value",
579
- "entity_id": "fake_entity_id",
580
533
  }
581
534
  ]
582
535
  responses.add(
@@ -8,7 +8,7 @@ from arkindex_worker.worker import BaseWorker
8
8
  from arkindex_worker.worker.training import TrainingMixin, create_archive
9
9
 
10
10
 
11
- @pytest.fixture()
11
+ @pytest.fixture
12
12
  def mock_training_worker(monkeypatch):
13
13
  class TrainingWorker(BaseWorker, TrainingMixin):
14
14
  """
@@ -22,7 +22,7 @@ def mock_training_worker(monkeypatch):
22
22
  return training_worker
23
23
 
24
24
 
25
- @pytest.fixture()
25
+ @pytest.fixture
26
26
  def default_model_version():
27
27
  return {
28
28
  "id": "model_version_id",
@@ -51,9 +51,9 @@ def test_create_archive(model_file_dir):
51
51
  archive_hash,
52
52
  ):
53
53
  assert zst_archive_path.exists(), "The archive was not created"
54
- assert (
55
- hash == "c5aedde18a768757351068b840c8c8f9"
56
- ), "Hash was not properly computed"
54
+ assert hash == "c5aedde18a768757351068b840c8c8f9", (
55
+ "Hash was not properly computed"
56
+ )
57
57
  assert 300 < size < 700
58
58
 
59
59
  assert not zst_archive_path.exists(), "Auto removal failed"
@@ -69,9 +69,9 @@ def test_create_archive_with_subfolder(model_file_dir_with_subfolder):
69
69
  archive_hash,
70
70
  ):
71
71
  assert zst_archive_path.exists(), "The archive was not created"
72
- assert (
73
- hash == "3e453881404689e6e125144d2db3e605"
74
- ), "Hash was not properly computed"
72
+ assert hash == "3e453881404689e6e125144d2db3e605", (
73
+ "Hash was not properly computed"
74
+ )
75
75
  assert 300 < size < 1500
76
76
 
77
77
  assert not zst_archive_path.exists(), "Auto removal failed"
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import logging
2
3
  import sys
3
4
  from argparse import Namespace
4
5
  from uuid import UUID
@@ -685,7 +686,7 @@ def test_run_cache(monkeypatch, mocker, mock_elements_worker_with_cache):
685
686
 
686
687
 
687
688
  def test_start_activity_conflict(
688
- monkeypatch, responses, mocker, mock_elements_worker_with_list
689
+ monkeypatch, responses, mock_elements_worker_with_list, caplog
689
690
  ):
690
691
  # Disable second configure call from run()
691
692
  monkeypatch.setattr(mock_elements_worker_with_list, "configure", lambda: None)
@@ -700,9 +701,6 @@ def test_start_activity_conflict(
700
701
  content="Either this activity does not exists or this state is not allowed.",
701
702
  ),
702
703
  )
703
- from arkindex_worker.worker import logger
704
-
705
- logger.info = mocker.MagicMock()
706
704
 
707
705
  mock_elements_worker_with_list.run()
708
706
 
@@ -716,14 +714,15 @@ def test_start_activity_conflict(
716
714
  "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
717
715
  ),
718
716
  ]
719
- assert logger.info.call_args_list[:2] == [
720
- mocker.call("Processing page Test Page n°1 (1234-deadbeef) (1/1)"),
721
- mocker.call("Skipping element 1234-deadbeef as it was already processed"),
717
+ assert [(record.levelno, record.message) for record in caplog.records] == [
718
+ (logging.INFO, "Processing page Test Page n°1 (1234-deadbeef) (1/1)"),
719
+ (logging.INFO, "Skipping element 1234-deadbeef as it was already processed"),
720
+ (logging.INFO, "Ran on 1 element: 1 completed, 0 failed"),
722
721
  ]
723
722
 
724
723
 
725
724
  def test_start_activity_error(
726
- monkeypatch, responses, mocker, mock_elements_worker_with_list
725
+ monkeypatch, responses, mock_elements_worker_with_list, caplog
727
726
  ):
728
727
  # Disable second configure call from run()
729
728
  monkeypatch.setattr(mock_elements_worker_with_list, "configure", lambda: None)
@@ -732,11 +731,8 @@ def test_start_activity_error(
732
731
  responses.add(
733
732
  responses.PUT,
734
733
  "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
735
- body=Exception("A wild Petilil appears !"),
734
+ body=Exception("A wild Petilil appears!"),
736
735
  )
737
- from arkindex_worker.worker import logger
738
-
739
- logger.error = mocker.MagicMock()
740
736
 
741
737
  with pytest.raises(SystemExit):
742
738
  mock_elements_worker_with_list.run()
@@ -755,8 +751,13 @@ def test_start_activity_error(
755
751
  "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
756
752
  ),
757
753
  ]
758
- assert logger.error.call_args_list == [
759
- mocker.call("Ran on 1 element: 0 completed, 1 failed")
754
+ assert [(record.levelno, record.message) for record in caplog.records] == [
755
+ (logging.INFO, "Processing page Test Page n°1 (1234-deadbeef) (1/1)"),
756
+ (
757
+ logging.WARNING,
758
+ "Failed running worker on element 1234-deadbeef: Exception('A wild Petilil appears!')",
759
+ ),
760
+ (logging.ERROR, "Ran on 1 element: 0 completed, 1 failed"),
760
761
  ]
761
762
 
762
763