arkindex-base-worker 0.4.0rc5__py3-none-any.whl → 0.4.0rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arkindex-base-worker
3
- Version: 0.4.0rc5
3
+ Version: 0.4.0rc6
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -41,19 +41,19 @@ Classifier: Programming Language :: Python :: 3.12
41
41
  Requires-Python: >=3.10
42
42
  Description-Content-Type: text/markdown
43
43
  License-File: LICENSE
44
- Requires-Dist: humanize==4.10.0
44
+ Requires-Dist: humanize==4.11.0
45
45
  Requires-Dist: peewee~=3.17
46
- Requires-Dist: Pillow==10.4.0
47
- Requires-Dist: python-gnupg==0.5.2
46
+ Requires-Dist: Pillow==11.0.0
47
+ Requires-Dist: python-gnupg==0.5.3
48
48
  Requires-Dist: shapely==2.0.6
49
49
  Requires-Dist: teklia-toolbox==0.1.7b1
50
- Requires-Dist: zstandard==0.22.0
50
+ Requires-Dist: zstandard==0.23.0
51
51
  Provides-Extra: docs
52
- Requires-Dist: black==24.4.2; extra == "docs"
53
- Requires-Dist: mkdocs-material==9.5.33; extra == "docs"
54
- Requires-Dist: mkdocstrings-python==1.11.1; extra == "docs"
52
+ Requires-Dist: black==24.10.0; extra == "docs"
53
+ Requires-Dist: mkdocs-material==9.5.48; extra == "docs"
54
+ Requires-Dist: mkdocstrings-python==1.12.2; extra == "docs"
55
55
  Provides-Extra: tests
56
- Requires-Dist: pytest==8.3.2; extra == "tests"
56
+ Requires-Dist: pytest==8.3.4; extra == "tests"
57
57
  Requires-Dist: pytest-mock==3.14.0; extra == "tests"
58
58
  Requires-Dist: pytest-responses==0.5.1; extra == "tests"
59
59
 
@@ -3,7 +3,7 @@ arkindex_worker/cache.py,sha256=qTblc_zKdYC47Wip6_O9Jf5qBkQW2ozQQrg-nsx1WuY,1122
3
3
  arkindex_worker/image.py,sha256=oEgVCrSHiGh3D5-UXfM6PvT17TttSxC0115irpvB3Dw,18581
4
4
  arkindex_worker/models.py,sha256=bPQzGZNs5a6z6DEcygsa8T33VOqPlMUbwKzHqlKzwbw,9923
5
5
  arkindex_worker/utils.py,sha256=q1EeLdC6ebYIH-C0LOAqw2cNpjCjVoP-Vbr-39mF4w0,9884
6
- arkindex_worker/worker/__init__.py,sha256=m255Cle3nE_FtAXbbJj_v1aS9ClT6hpDlrUFXTPiqkI,15985
6
+ arkindex_worker/worker/__init__.py,sha256=0_YHeOe31KR_8ynbnYMIMwnSQTVbKkkeLGmnlTMhFx0,16234
7
7
  arkindex_worker/worker/base.py,sha256=7Pmw-UQSxV-xkW8NO5cXsxJ8W8szzyppMaNjq_az81A,19844
8
8
  arkindex_worker/worker/classification.py,sha256=zECSNzGCZFzoPoDVZN4kuGYRNLzMQLBaRt3q1jnBSaA,10952
9
9
  arkindex_worker/worker/corpus.py,sha256=0TQFOwZ6Te-CZi6lgkZY1wzyJ5wO9LAmcVQtqHvZpPk,2291
@@ -12,7 +12,7 @@ arkindex_worker/worker/element.py,sha256=Qvvq9kJnAHNATHW7zi96eIY1x-0MsR-T5rrSJg6
12
12
  arkindex_worker/worker/entity.py,sha256=s5wjX6_JfTyk4qfMoV0OWfOXUx6T-9WpOiEpaoaCEFM,14808
13
13
  arkindex_worker/worker/image.py,sha256=t_Az6IGnj0EZyvcA4XxfPikOUjn_pztgsyxTkFZhaXU,621
14
14
  arkindex_worker/worker/metadata.py,sha256=VRajtd2kaBvar9GercX4knvR6l1WFYjoCdJWU9ccKgk,7291
15
- arkindex_worker/worker/process.py,sha256=IAJaiiCizK4vpPmMQD0yYSB6IIoyy7yU-5JKaiuPb7o,1073
15
+ arkindex_worker/worker/process.py,sha256=9TEHpMcBax1wc6PrWMMrdXe2uNfqyVj7n_dAYZRBGnY,1854
16
16
  arkindex_worker/worker/task.py,sha256=r1j7_qbdNu2Z8H8HbGzO3P3qdx-2N1pBbUPFDca0rqg,1519
17
17
  arkindex_worker/worker/training.py,sha256=H8FmCdzGcDW-WMMwcgvmZPlN5tPHwGo0BXn12qmzj8g,10875
18
18
  arkindex_worker/worker/transcription.py,sha256=52RY9kYsiR1sz9FxOigyo12Ker3VDbQ4U42gK9DpR3g,21146
@@ -41,20 +41,21 @@ tests/test_elements_worker/test_entity_create.py,sha256=9Tjr9KA2yo44VFV283q_cs6X
41
41
  tests/test_elements_worker/test_entity_list_and_check.py,sha256=ENBLaqbXlRUDbHRvQla3080a0HJltrWAPYWNohUA9NU,4992
42
42
  tests/test_elements_worker/test_image.py,sha256=BljMNKgec_9a5bzNzFpYZIvSbuvwsWDfdqLHVJaTa7M,2079
43
43
  tests/test_elements_worker/test_metadata.py,sha256=Xfggy-vxw5DZ3hFKx3sB7OYb2d1tu1RiNK8fvKJIaBs,22294
44
+ tests/test_elements_worker/test_process.py,sha256=y4RoVhPfyHzR795fw7-_FXElBcKo3fy4Ew_HI-kxJic,3088
44
45
  tests/test_elements_worker/test_task.py,sha256=wTUWqN9UhfKmJn3IcFY75EW4I1ulRhisflmY1kmP47s,5574
45
46
  tests/test_elements_worker/test_training.py,sha256=Qxi9EzGr_uKcn2Fh5aE6jNrq1K8QKLiOiSew4upASPs,8721
46
47
  tests/test_elements_worker/test_transcription_create.py,sha256=yznO9B_BVsOR0Z_VY5ZL8gJp0ZPCz_4sPUs5dXtixAg,29281
47
48
  tests/test_elements_worker/test_transcription_create_with_elements.py,sha256=tmcyglgssEqMnt1Mdy_u6X1m2wgLWTo_HdWst3GrK2k,33056
48
49
  tests/test_elements_worker/test_transcription_list.py,sha256=ikz7HYPCoQWTdTRCd382SB-y-T2BbigPLlIcx5Eow-I,15324
49
50
  tests/test_elements_worker/test_version.py,sha256=xqCgcgukTFJzkMgYfQG-8mTbu0o2fdYjWC07FktThfw,2125
50
- tests/test_elements_worker/test_worker.py,sha256=fnFwkcDb6tx5i7lbelifeHD-BrGz6o5UBEojPCdtuAo,23474
51
+ tests/test_elements_worker/test_worker.py,sha256=pLUgjyrrXrzVD6T-kdH1ppk5Yn_iDuI8JdFGweTEMXE,25156
51
52
  worker-demo/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
53
  worker-demo/tests/conftest.py,sha256=XzNMNeg6pmABUAH8jN6eZTlZSFGLYjS3-DTXjiRN6Yc,1002
53
54
  worker-demo/tests/test_worker.py,sha256=3DLd4NRK4bfyatG5P_PK4k9P9tJHx9XQq5_ryFEEFVg,304
54
55
  worker-demo/worker_demo/__init__.py,sha256=2BPomV8ZMNf3YXJgloatKeHQCE6QOkwmsHGkO6MkQuM,125
55
56
  worker-demo/worker_demo/worker.py,sha256=Rt-DjWa5iBP08k58NDZMfeyPuFbtNcbX6nc5jFX7GNo,440
56
- arkindex_base_worker-0.4.0rc5.dist-info/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
57
- arkindex_base_worker-0.4.0rc5.dist-info/METADATA,sha256=EFoMgnh4SUvYV0yIdBSfA7aoaZiTA7kYrJTrH2la3mY,3338
58
- arkindex_base_worker-0.4.0rc5.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
59
- arkindex_base_worker-0.4.0rc5.dist-info/top_level.txt,sha256=58NuslgxQC2vT4DiqZEgO4JqJRrYa2yeNI9QvkbfGQU,40
60
- arkindex_base_worker-0.4.0rc5.dist-info/RECORD,,
57
+ arkindex_base_worker-0.4.0rc6.dist-info/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
58
+ arkindex_base_worker-0.4.0rc6.dist-info/METADATA,sha256=gJd_0X7A26nuBe2EsIPHwap1XV2KnJBq2QwjBBB3Wi0,3339
59
+ arkindex_base_worker-0.4.0rc6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
60
+ arkindex_base_worker-0.4.0rc6.dist-info/top_level.txt,sha256=58NuslgxQC2vT4DiqZEgO4JqJRrYa2yeNI9QvkbfGQU,40
61
+ arkindex_base_worker-0.4.0rc6.dist-info/RECORD,,
@@ -27,7 +27,7 @@ from arkindex_worker.worker.element import ElementMixin
27
27
  from arkindex_worker.worker.entity import EntityMixin
28
28
  from arkindex_worker.worker.image import ImageMixin
29
29
  from arkindex_worker.worker.metadata import MetaDataMixin, MetaType # noqa: F401
30
- from arkindex_worker.worker.process import ActivityState, ProcessMode
30
+ from arkindex_worker.worker.process import ActivityState, ProcessMixin, ProcessMode
31
31
  from arkindex_worker.worker.task import TaskMixin
32
32
  from arkindex_worker.worker.transcription import TranscriptionMixin
33
33
  from arkindex_worker.worker.version import WorkerVersionMixin
@@ -44,6 +44,7 @@ class ElementsWorker(
44
44
  EntityMixin,
45
45
  MetaDataMixin,
46
46
  ImageMixin,
47
+ ProcessMixin,
47
48
  ):
48
49
  """
49
50
  Base class for ML workers that operate on Arkindex elements.
@@ -119,6 +120,9 @@ class ElementsWorker(
119
120
  return list(
120
121
  chain.from_iterable(map(self.list_set_elements, self.list_sets()))
121
122
  )
123
+ elif self.process_mode == ProcessMode.Export:
124
+ # For export mode processes, use list_process_elements and return element IDs
125
+ return {item["id"] for item in self.list_process_elements()}
122
126
 
123
127
  invalid_element_ids = list(filter(invalid_element_id, out))
124
128
  assert (
@@ -1,5 +1,11 @@
1
+ from collections.abc import Iterator
1
2
  from enum import Enum
2
3
 
4
+ from arkindex_worker.cache import unsupported_cache
5
+
6
+ # Increases the number of elements returned per page by the API
7
+ PROCESS_ELEMENTS_PAGE_SIZE = 500
8
+
3
9
 
4
10
  class ActivityState(Enum):
5
11
  """
@@ -66,3 +72,21 @@ class ProcessMode(Enum):
66
72
  """
67
73
  Export processes.
68
74
  """
75
+
76
+
77
+ class ProcessMixin:
78
+ @unsupported_cache
79
+ def list_process_elements(self, with_image: bool = False) -> Iterator[dict]:
80
+ """
81
+ List the elements of a process.
82
+
83
+ :param with_image: whether or not to include zone and image information in the elements response.
84
+ :returns: the process' elements.
85
+ """
86
+ return self.api_client.paginate(
87
+ "ListProcessElements",
88
+ id=self.process_information["id"],
89
+ with_image=with_image,
90
+ allow_missing_data=True,
91
+ page_size=PROCESS_ELEMENTS_PAGE_SIZE,
92
+ )
@@ -0,0 +1,89 @@
1
+ import pytest
2
+
3
+ from tests import PROCESS_ID
4
+
5
+
6
+ @pytest.mark.parametrize(
7
+ ("with_image", "elements"),
8
+ [
9
+ (
10
+ False,
11
+ [
12
+ {
13
+ "id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
14
+ "type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
15
+ "name": "element 1",
16
+ "confidence": 1,
17
+ "image_id": None,
18
+ "image_width": None,
19
+ "image_height": None,
20
+ "image_url": None,
21
+ "polygon": None,
22
+ "rotation_angle": 0,
23
+ "mirrored": False,
24
+ },
25
+ {
26
+ "id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0",
27
+ "type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
28
+ "name": "element 2",
29
+ "confidence": 1,
30
+ "image_id": None,
31
+ "image_width": None,
32
+ "image_height": None,
33
+ "image_url": None,
34
+ "polygon": None,
35
+ "rotation_angle": 0,
36
+ "mirrored": False,
37
+ },
38
+ ],
39
+ ),
40
+ (
41
+ True,
42
+ [
43
+ {
44
+ "id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
45
+ "type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
46
+ "name": "element 1",
47
+ "confidence": 1,
48
+ "image_id": "aaa2aaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
49
+ "image_width": 76,
50
+ "image_height": 138,
51
+ "image_url": "http://somewhere.com/iiif/image.jpeg",
52
+ "polygon": [[0, 0], [0, 40], [20, 40], [20, 0]],
53
+ "rotation_angle": 0,
54
+ "mirrored": False,
55
+ },
56
+ {
57
+ "id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0",
58
+ "type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
59
+ "name": "element 2",
60
+ "confidence": 1,
61
+ "image_id": "aaa2aaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
62
+ "image_width": 138,
63
+ "image_height": 76,
64
+ "image_url": "http://somewhere.com/iiif/image.jpeg",
65
+ "polygon": [[0, 0], [0, 40], [20, 40], [20, 0]],
66
+ "rotation_angle": 0,
67
+ "mirrored": False,
68
+ },
69
+ ],
70
+ ),
71
+ ],
72
+ )
73
+ def test_list_process_elements_with_image(
74
+ responses, mock_elements_worker, with_image, elements
75
+ ):
76
+ responses.add(
77
+ responses.GET,
78
+ f"http://testserver/api/v1/process/{PROCESS_ID}/elements/?page_size=500&with_count=true&with_image={with_image}",
79
+ status=200,
80
+ json={
81
+ "count": 2,
82
+ "next": None,
83
+ "results": elements,
84
+ },
85
+ )
86
+ assert (
87
+ list(mock_elements_worker.list_process_elements(with_image=with_image))
88
+ == elements
89
+ )
@@ -16,6 +16,7 @@ from arkindex_worker.models import Element
16
16
  from arkindex_worker.worker import ActivityState, ElementsWorker
17
17
  from arkindex_worker.worker.dataset import DatasetState
18
18
  from arkindex_worker.worker.process import ProcessMode
19
+ from tests import PROCESS_ID
19
20
 
20
21
  from . import BASE_API_CALLS
21
22
 
@@ -523,6 +524,51 @@ def test_get_elements_both_args_error(mocker, mock_elements_worker, tmp_path):
523
524
  worker.get_elements()
524
525
 
525
526
 
527
+ def test_get_elements_export_process(mock_elements_worker, responses):
528
+ responses.add(
529
+ responses.GET,
530
+ f"http://testserver/api/v1/process/{PROCESS_ID}/elements/?page_size=500&with_count=true&with_image=False",
531
+ status=200,
532
+ json={
533
+ "count": 2,
534
+ "next": None,
535
+ "results": [
536
+ {
537
+ "id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
538
+ "type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
539
+ "name": "element 1",
540
+ "confidence": 1,
541
+ "image_id": None,
542
+ "image_width": None,
543
+ "image_height": None,
544
+ "image_url": None,
545
+ "polygon": None,
546
+ "rotation_angle": 0,
547
+ "mirrored": False,
548
+ },
549
+ {
550
+ "id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0",
551
+ "type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
552
+ "name": "element 2",
553
+ "confidence": 1,
554
+ "image_id": None,
555
+ "image_width": None,
556
+ "image_height": None,
557
+ "image_url": None,
558
+ "polygon": None,
559
+ "rotation_angle": 0,
560
+ "mirrored": False,
561
+ },
562
+ ],
563
+ },
564
+ )
565
+ mock_elements_worker.process_information["mode"] = "export"
566
+ assert set(mock_elements_worker.get_elements()) == {
567
+ "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
568
+ "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0",
569
+ }
570
+
571
+
526
572
  @pytest.mark.usefixtures("_mock_worker_run_api")
527
573
  def test_activities_disabled(responses, monkeypatch):
528
574
  """Test worker process elements without updating activities when they are disabled for the process"""