arkindex-base-worker 0.5.0b3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -119,19 +119,6 @@ def test_create_metadata_wrong_value(mock_elements_worker):
119
119
  )
120
120
 
121
121
 
122
- def test_create_metadata_wrong_entity(mock_elements_worker):
123
- elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
124
-
125
- with pytest.raises(AssertionError, match="entity should be of type str"):
126
- mock_elements_worker.create_metadata(
127
- element=elt,
128
- type=MetaType.Location,
129
- name="Teklia",
130
- value="La Turbine, Grenoble 38000",
131
- entity=1234,
132
- )
133
-
134
-
135
122
  def test_create_metadata_api_error(responses, mock_elements_worker):
136
123
  elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
137
124
  responses.add(
@@ -188,7 +175,6 @@ def test_create_metadata(responses, mock_elements_worker):
188
175
  "type": "location",
189
176
  "name": "Teklia",
190
177
  "value": "La Turbine, Grenoble 38000",
191
- "entity_id": None,
192
178
  "worker_run_id": "56785678-5678-5678-5678-567856785678",
193
179
  }
194
180
  assert metadata_id == "12345678-1234-1234-1234-123456789123"
@@ -223,7 +209,6 @@ def test_create_metadata_cached_element(responses, mock_elements_worker_with_cac
223
209
  "type": "location",
224
210
  "name": "Teklia",
225
211
  "value": "La Turbine, Grenoble 38000",
226
- "entity_id": None,
227
212
  "worker_run_id": "56785678-5678-5678-5678-567856785678",
228
213
  }
229
214
  assert metadata_id == "12345678-1234-1234-1234-123456789123"
@@ -239,7 +224,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
239
224
  "type": MetaType.Text,
240
225
  "name": "Year",
241
226
  "value": "2024",
242
- "entity_id": "entity_id",
243
227
  },
244
228
  ]
245
229
  if batch_size > 1:
@@ -256,7 +240,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
256
240
  "name": metadata_list[0]["name"],
257
241
  "value": metadata_list[0]["value"],
258
242
  "dates": [],
259
- "entity_id": None,
260
243
  },
261
244
  {
262
245
  "id": "fake_metadata_id2",
@@ -264,7 +247,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
264
247
  "name": metadata_list[1]["name"],
265
248
  "value": metadata_list[1]["value"],
266
249
  "dates": [],
267
- "entity_id": metadata_list[1]["entity_id"],
268
250
  },
269
251
  ],
270
252
  },
@@ -284,7 +266,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
284
266
  "name": meta["name"],
285
267
  "value": meta["value"],
286
268
  "dates": [],
287
- "entity_id": meta.get("entity_id"),
288
269
  }
289
270
  ],
290
271
  },
@@ -316,7 +297,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
316
297
  first_meta = {
317
298
  **metadata_list[0],
318
299
  "type": metadata_list[0]["type"].value,
319
- "entity_id": None,
320
300
  }
321
301
  second_meta = {**metadata_list[1], "type": metadata_list[1]["type"].value}
322
302
  empty_payload = {
@@ -346,7 +326,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
346
326
  "name": metadata_list[0]["name"],
347
327
  "value": metadata_list[0]["value"],
348
328
  "dates": [],
349
- "entity_id": None,
350
329
  },
351
330
  {
352
331
  "id": "fake_metadata_id2",
@@ -354,7 +333,6 @@ def test_create_metadata_bulk(batch_size, responses, mock_elements_worker):
354
333
  "name": metadata_list[1]["name"],
355
334
  "value": metadata_list[1]["value"],
356
335
  "dates": [],
357
- "entity_id": metadata_list[1]["entity_id"],
358
336
  },
359
337
  ]
360
338
 
@@ -373,7 +351,6 @@ def test_create_metadata_bulk_cached_element(
373
351
  "type": MetaType.Text,
374
352
  "name": "Year",
375
353
  "value": "2024",
376
- "entity_id": "entity_id",
377
354
  },
378
355
  ]
379
356
  if batch_size > 1:
@@ -390,7 +367,6 @@ def test_create_metadata_bulk_cached_element(
390
367
  "name": metadata_list[0]["name"],
391
368
  "value": metadata_list[0]["value"],
392
369
  "dates": [],
393
- "entity_id": None,
394
370
  },
395
371
  {
396
372
  "id": "fake_metadata_id2",
@@ -398,7 +374,6 @@ def test_create_metadata_bulk_cached_element(
398
374
  "name": metadata_list[1]["name"],
399
375
  "value": metadata_list[1]["value"],
400
376
  "dates": [],
401
- "entity_id": metadata_list[1]["entity_id"],
402
377
  },
403
378
  ],
404
379
  },
@@ -418,7 +393,6 @@ def test_create_metadata_bulk_cached_element(
418
393
  "name": meta["name"],
419
394
  "value": meta["value"],
420
395
  "dates": [],
421
- "entity_id": meta.get("entity_id"),
422
396
  }
423
397
  ],
424
398
  },
@@ -450,7 +424,6 @@ def test_create_metadata_bulk_cached_element(
450
424
  first_meta = {
451
425
  **metadata_list[0],
452
426
  "type": metadata_list[0]["type"].value,
453
- "entity_id": None,
454
427
  }
455
428
  second_meta = {**metadata_list[1], "type": metadata_list[1]["type"].value}
456
429
  empty_payload = {
@@ -480,7 +453,6 @@ def test_create_metadata_bulk_cached_element(
480
453
  "name": metadata_list[0]["name"],
481
454
  "value": metadata_list[0]["value"],
482
455
  "dates": [],
483
- "entity_id": None,
484
456
  },
485
457
  {
486
458
  "id": "fake_metadata_id2",
@@ -488,7 +460,6 @@ def test_create_metadata_bulk_cached_element(
488
460
  "name": metadata_list[1]["name"],
489
461
  "value": metadata_list[1]["value"],
490
462
  "dates": [],
491
- "entity_id": metadata_list[1]["entity_id"],
492
463
  },
493
464
  ]
494
465
 
@@ -552,23 +523,6 @@ def test_create_metadata_bulk_wrong_value(mock_elements_worker, wrong_value):
552
523
  )
553
524
 
554
525
 
555
- @pytest.mark.parametrize("wrong_entity", [[1, 2, 3, 4], 1234, 12.5])
556
- def test_create_metadata_bulk_wrong_entity(mock_elements_worker, wrong_entity):
557
- element = Element({"id": "fake_element_id"})
558
- wrong_metadata_list = [
559
- {
560
- "type": MetaType.Text,
561
- "name": "fake_name",
562
- "value": "fake_value",
563
- "entity_id": wrong_entity,
564
- }
565
- ]
566
- with pytest.raises(AssertionError, match="entity_id should be None or a str"):
567
- mock_elements_worker.create_metadata_bulk(
568
- element=element, metadata_list=wrong_metadata_list
569
- )
570
-
571
-
572
526
  def test_create_metadata_bulk_api_error(responses, mock_elements_worker):
573
527
  element = Element({"id": "12341234-1234-1234-1234-123412341234"})
574
528
  metadata_list = [
@@ -576,7 +530,6 @@ def test_create_metadata_bulk_api_error(responses, mock_elements_worker):
576
530
  "type": MetaType.Text,
577
531
  "name": "fake_name",
578
532
  "value": "fake_value",
579
- "entity_id": "fake_entity_id",
580
533
  }
581
534
  ]
582
535
  responses.add(
@@ -685,6 +685,112 @@ def test_run_cache(monkeypatch, mocker, mock_elements_worker_with_cache):
685
685
  ]
686
686
 
687
687
 
688
+ def test_run_consuming_worker_activities(
689
+ monkeypatch,
690
+ mock_elements_worker_consume_wa,
691
+ responses,
692
+ caplog,
693
+ ):
694
+ """Check the consuming worker activities runtime uses StartWorkerActivity + UpdateWorkerActivity"""
695
+ # Disable second configure call from run()
696
+ monkeypatch.setattr(mock_elements_worker_consume_wa, "configure", lambda: None)
697
+
698
+ assert mock_elements_worker_consume_wa.is_read_only is False
699
+
700
+ # Provide 2 worker activities to run and the corresponding update call
701
+ # and 2 element details response
702
+ for i, elt_id in enumerate(("page_1", "page_2"), 1):
703
+ responses.add(
704
+ responses.POST,
705
+ "http://testserver/api/v1/process/start-activity/",
706
+ status=200,
707
+ json={
708
+ "id": elt_id,
709
+ "type_id": "page-aaaa-aaaa-aaaa-aaaaaaaaaaaa", # Element type provided by mock corpus
710
+ "name": f"Page n°{i}",
711
+ },
712
+ )
713
+ responses.add(
714
+ responses.PUT,
715
+ "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
716
+ status=200,
717
+ )
718
+ responses.add(
719
+ responses.GET,
720
+ f"http://testserver/api/v1/element/{elt_id}/",
721
+ status=200,
722
+ json={
723
+ "id": elt_id,
724
+ "type": "page",
725
+ "name": f"Page n°{i}",
726
+ },
727
+ )
728
+
729
+ # Then a 404 to stop iterating
730
+ responses.add(
731
+ responses.POST,
732
+ "http://testserver/api/v1/process/start-activity/",
733
+ status=404,
734
+ )
735
+
736
+ # Simply run the process
737
+ mock_elements_worker_consume_wa.run()
738
+
739
+ # We call twice configure in the conftest
740
+ assert len(responses.calls) == len(BASE_API_CALLS) * 2 + 7
741
+ assert [
742
+ (call.request.method, call.request.url) for call in responses.calls
743
+ ] == BASE_API_CALLS * 2 + [
744
+ (
745
+ "POST",
746
+ "http://testserver/api/v1/process/start-activity/",
747
+ ),
748
+ (
749
+ "GET",
750
+ "http://testserver/api/v1/element/page_1/",
751
+ ),
752
+ (
753
+ "PUT",
754
+ "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
755
+ ),
756
+ (
757
+ "POST",
758
+ "http://testserver/api/v1/process/start-activity/",
759
+ ),
760
+ (
761
+ "GET",
762
+ "http://testserver/api/v1/element/page_2/",
763
+ ),
764
+ (
765
+ "PUT",
766
+ "http://testserver/api/v1/workers/versions/56785678-5678-5678-5678-567856785678/activity/",
767
+ ),
768
+ (
769
+ "POST",
770
+ "http://testserver/api/v1/process/start-activity/",
771
+ ),
772
+ ]
773
+
774
+ assert [(record.levelno, record.message) for record in caplog.records] == [
775
+ (
776
+ logging.INFO,
777
+ "Using StartWorkerActivity instead of reading init_elements JSON file",
778
+ ),
779
+ (
780
+ logging.INFO,
781
+ "Processing page Page n°1 (page_1) (n°1)",
782
+ ),
783
+ (
784
+ logging.INFO,
785
+ "Processing page Page n°2 (page_2) (n°2)",
786
+ ),
787
+ (
788
+ logging.INFO,
789
+ "Ran on 2 elements: 2 completed, 0 failed",
790
+ ),
791
+ ]
792
+
793
+
688
794
  def test_start_activity_conflict(
689
795
  monkeypatch, responses, mock_elements_worker_with_list, caplog
690
796
  ):
tests/test_image.py CHANGED
@@ -113,21 +113,37 @@ def test_update_pillow_image_size_limit(max_image_pixels, expected_image_pixels)
113
113
  assert Image.MAX_IMAGE_PIXELS == MAX_IMAGE_PIXELS
114
114
 
115
115
 
116
- def test_download_tiles(responses):
116
+ @pytest.mark.parametrize(
117
+ ("id_key", "resize"),
118
+ [
119
+ # IIIF version 2
120
+ ("@id", "full"),
121
+ # IIIF version 3
122
+ ("id", "max"),
123
+ ],
124
+ )
125
+ def test_download_tiles(responses, id_key, resize):
117
126
  expected = Image.open(FULL_IMAGE).convert("RGB")
118
127
  tile_bytes = TILE.read_bytes()
119
128
 
120
129
  responses.add(
121
130
  responses.GET,
122
131
  "http://nowhere/info.json",
123
- json={"width": 543, "height": 720, "tiles": [{"width": 181, "height": 240}]},
132
+ json={
133
+ id_key: "http://nowhere",
134
+ "width": 543,
135
+ "height": 720,
136
+ "tiles": [
137
+ {"width": 181, "height": 240},
138
+ ],
139
+ },
124
140
  )
125
141
 
126
142
  for x in (0, 181, 362):
127
143
  for y in (0, 240, 480):
128
144
  responses.add(
129
145
  responses.GET,
130
- f"http://nowhere/{x},{y},181,240/full/0/default.jpg",
146
+ f"http://nowhere/{x},{y},181,240/{resize}/0/default.jpg",
131
147
  body=tile_bytes,
132
148
  )
133
149
 
tests/test_merge.py CHANGED
@@ -7,7 +7,6 @@ from arkindex_worker.cache import (
7
7
  SQL_VERSION,
8
8
  CachedClassification,
9
9
  CachedElement,
10
- CachedEntity,
11
10
  CachedImage,
12
11
  CachedTranscription,
13
12
  CachedTranscriptionEntity,
@@ -85,7 +84,6 @@ def test_merge_databases(
85
84
  assert CachedElement.select().count() == 0
86
85
  assert CachedTranscription.select().count() == 0
87
86
  assert CachedClassification.select().count() == 0
88
- assert CachedEntity.select().count() == 0
89
87
  assert CachedTranscriptionEntity.select().count() == 0
90
88
 
91
89
  # Retrieve parents databases paths
@@ -103,7 +101,6 @@ def test_merge_databases(
103
101
  assert CachedElement.select().count() == len(expected_elements)
104
102
  assert CachedTranscription.select().count() == len(expected_transcriptions)
105
103
  assert CachedClassification.select().count() == 0
106
- assert CachedEntity.select().count() == 0
107
104
  assert CachedTranscriptionEntity.select().count() == 0
108
105
  assert [
109
106
  e.id for e in CachedElement.select().order_by("id")
@@ -124,7 +121,6 @@ def test_merge_chunk(mock_databases, tmp_path):
124
121
  assert CachedElement.select().count() == 0
125
122
  assert CachedTranscription.select().count() == 0
126
123
  assert CachedClassification.select().count() == 0
127
- assert CachedEntity.select().count() == 0
128
124
  assert CachedTranscriptionEntity.select().count() == 0
129
125
 
130
126
  # Check filenames
@@ -144,7 +140,6 @@ def test_merge_chunk(mock_databases, tmp_path):
144
140
  assert CachedElement.select().count() == 3
145
141
  assert CachedTranscription.select().count() == 0
146
142
  assert CachedClassification.select().count() == 0
147
- assert CachedEntity.select().count() == 0
148
143
  assert CachedTranscriptionEntity.select().count() == 0
149
144
  assert [e.id for e in CachedElement.select().order_by("id")] == [
150
145
  UUID("42424242-4242-4242-4242-424242424242"),
@@ -171,7 +166,6 @@ def test_merge_from_worker(
171
166
  assert CachedElement.select().count() == 0
172
167
  assert CachedTranscription.select().count() == 0
173
168
  assert CachedClassification.select().count() == 0
174
- assert CachedEntity.select().count() == 0
175
169
  assert CachedTranscriptionEntity.select().count() == 0
176
170
 
177
171
  # Configure worker with a specific data directory
@@ -191,7 +185,6 @@ def test_merge_from_worker(
191
185
  assert CachedElement.select().count() == 3
192
186
  assert CachedTranscription.select().count() == 1
193
187
  assert CachedClassification.select().count() == 0
194
- assert CachedEntity.select().count() == 0
195
188
  assert CachedTranscriptionEntity.select().count() == 0
196
189
  assert [e.id for e in CachedElement.select().order_by("id")] == [
197
190
  UUID("12341234-1234-1234-1234-123412341234"),
@@ -0,0 +1,81 @@
1
+ def test_simple_configuration(mock_base_worker_modern_conf, responses):
2
+ # Provide the full configuration directly from the worker run
3
+ responses.add(
4
+ responses.GET,
5
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
6
+ status=200,
7
+ json={"configuration": [{"key": "some_key", "value": "test", "secret": False}]},
8
+ )
9
+
10
+ mock_base_worker_modern_conf.configure()
11
+
12
+ assert mock_base_worker_modern_conf.config == {"some_key": "test"}
13
+ assert (
14
+ mock_base_worker_modern_conf.user_configuration
15
+ == mock_base_worker_modern_conf.config
16
+ )
17
+ assert mock_base_worker_modern_conf.secrets == {}
18
+
19
+
20
+ def test_empty(mock_base_worker_modern_conf, responses):
21
+ # Provide the full configuration directly from the worker run
22
+ responses.add(
23
+ responses.GET,
24
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
25
+ status=200,
26
+ json={"configuration": []},
27
+ )
28
+
29
+ mock_base_worker_modern_conf.configure()
30
+
31
+ assert mock_base_worker_modern_conf.config == {}
32
+ assert (
33
+ mock_base_worker_modern_conf.user_configuration
34
+ == mock_base_worker_modern_conf.config
35
+ )
36
+ assert mock_base_worker_modern_conf.secrets == {}
37
+
38
+
39
+ def test_with_secrets(mock_base_worker_modern_conf, responses):
40
+ # Provide the full configuration directly from the worker run
41
+ responses.add(
42
+ responses.GET,
43
+ "http://testserver/api/v1/workers/runs/56785678-5678-5678-5678-567856785678/configuration/",
44
+ status=200,
45
+ json={
46
+ "configuration": [
47
+ {"key": "some_key", "value": "test", "secret": False},
48
+ {
49
+ "key": "a_secret",
50
+ "value": "471b9e64-29af-48dc-8bda-1a64a2da0c12",
51
+ "secret": True,
52
+ },
53
+ ]
54
+ },
55
+ )
56
+
57
+ # Provide a secret value
58
+ responses.add(
59
+ responses.GET,
60
+ "http://testserver/api/v1/secret/471b9e64-29af-48dc-8bda-1a64a2da0c12",
61
+ status=200,
62
+ json={
63
+ "id": "471b9e64-29af-48dc-8bda-1a64a2da0c12",
64
+ "name": "a_secret",
65
+ "content": "My super duper secret value",
66
+ },
67
+ )
68
+
69
+ mock_base_worker_modern_conf.configure()
70
+
71
+ assert mock_base_worker_modern_conf.config == {
72
+ "a_secret": "My super duper secret value",
73
+ "some_key": "test",
74
+ }
75
+ assert (
76
+ mock_base_worker_modern_conf.user_configuration
77
+ == mock_base_worker_modern_conf.config
78
+ )
79
+ assert mock_base_worker_modern_conf.secrets == {
80
+ "a_secret": "My super duper secret value"
81
+ }
tests/test_utils.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import zipfile
2
3
 
3
4
  import pytest
4
5
 
@@ -7,6 +8,7 @@ from arkindex_worker.utils import (
7
8
  DEFAULT_BATCH_SIZE,
8
9
  batch_publication,
9
10
  close_delete_file,
11
+ create_zip_archive,
10
12
  extract_tar_zst_archive,
11
13
  parse_source_id,
12
14
  )
@@ -118,3 +120,43 @@ def test_batch_publication_decorator_alongside_unsupported_cache(caplog):
118
120
  "This API helper `custom_publication_in_batches_without_cache` did not update the cache database",
119
121
  ),
120
122
  ]
123
+
124
+
125
+ def test_zip_archive():
126
+ # Create zip archive from fixtures
127
+ _, archive = create_zip_archive(FIXTURES_DIR / "extract_parent_archives/expected")
128
+
129
+ # Check the files in the archive
130
+ with zipfile.ZipFile(archive, mode="r") as f:
131
+ assert sorted(f.namelist()) == [
132
+ "test/",
133
+ "test/images/",
134
+ "test/images/f2649ce7-333e-44d2-ae73-387f18aad1f6.png",
135
+ "test/labels/",
136
+ "test/labels/f2649ce7-333e-44d2-ae73-387f18aad1f6.png",
137
+ "test/labels_json/",
138
+ "test/labels_json/f2649ce7-333e-44d2-ae73-387f18aad1f6.json",
139
+ "train/",
140
+ "train/images/",
141
+ "train/images/98115546-df07-448c-a2f0-34aa24789b77.png",
142
+ "train/images/ebeaa451-9287-4df7-9c40-07eb25cadb78.png",
143
+ "train/labels/",
144
+ "train/labels/98115546-df07-448c-a2f0-34aa24789b77.png",
145
+ "train/labels/ebeaa451-9287-4df7-9c40-07eb25cadb78.png",
146
+ "train/labels_json/",
147
+ "train/labels_json/98115546-df07-448c-a2f0-34aa24789b77.json",
148
+ "train/labels_json/ebeaa451-9287-4df7-9c40-07eb25cadb78.json",
149
+ "val/",
150
+ "val/images/",
151
+ "val/images/2987176d-4338-40f2-90d9-6d2cb4fd4a00.png",
152
+ "val/images/e3f91312-9201-45b7-9c32-e04a97ff1334.png",
153
+ "val/labels/",
154
+ "val/labels/2987176d-4338-40f2-90d9-6d2cb4fd4a00.png",
155
+ "val/labels/e3f91312-9201-45b7-9c32-e04a97ff1334.png",
156
+ "val/labels_json/",
157
+ "val/labels_json/2987176d-4338-40f2-90d9-6d2cb4fd4a00.json",
158
+ "val/labels_json/e3f91312-9201-45b7-9c32-e04a97ff1334.json",
159
+ ]
160
+
161
+ # Cleanup
162
+ archive.unlink()