arkindex-base-worker 0.4.0rc2__py3-none-any.whl → 0.4.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arkindex-base-worker
3
- Version: 0.4.0rc2
3
+ Version: 0.4.0rc3
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -45,12 +45,12 @@ Requires-Dist: peewee ~=3.17
45
45
  Requires-Dist: Pillow ==10.4.0
46
46
  Requires-Dist: python-gnupg ==0.5.2
47
47
  Requires-Dist: shapely ==2.0.5
48
- Requires-Dist: teklia-toolbox ==0.1.5
48
+ Requires-Dist: teklia-toolbox ==0.1.7b1
49
49
  Requires-Dist: zstandard ==0.22.0
50
50
  Provides-Extra: docs
51
51
  Requires-Dist: black ==24.4.2 ; extra == 'docs'
52
52
  Requires-Dist: mkdocs-material ==9.5.33 ; extra == 'docs'
53
- Requires-Dist: mkdocstrings-python ==1.10.8 ; extra == 'docs'
53
+ Requires-Dist: mkdocstrings-python ==1.11.1 ; extra == 'docs'
54
54
  Provides-Extra: tests
55
55
  Requires-Dist: pytest ==8.3.2 ; extra == 'tests'
56
56
  Requires-Dist: pytest-mock ==3.14.0 ; extra == 'tests'
@@ -3,18 +3,18 @@ arkindex_worker/cache.py,sha256=FTlB0coXofn5zTNRTcVIvh709mcw4a1bPGqkwWjKs3w,1124
3
3
  arkindex_worker/image.py,sha256=oEgVCrSHiGh3D5-UXfM6PvT17TttSxC0115irpvB3Dw,18581
4
4
  arkindex_worker/models.py,sha256=bPQzGZNs5a6z6DEcygsa8T33VOqPlMUbwKzHqlKzwbw,9923
5
5
  arkindex_worker/utils.py,sha256=q1EeLdC6ebYIH-C0LOAqw2cNpjCjVoP-Vbr-39mF4w0,9884
6
- arkindex_worker/worker/__init__.py,sha256=4kj94AbdACO-q7w0krmv2QkCWbP7vLLgZk9sowZUBNw,15880
7
- arkindex_worker/worker/base.py,sha256=hIorYsLy88xR4-dhIsaQHbWCz5FMoja-gfRPCJhT3eY,19843
8
- arkindex_worker/worker/classification.py,sha256=ECm1cnQPOj_9m-CoO0e182ElSySAUOoyddHrORbShhc,10951
9
- arkindex_worker/worker/corpus.py,sha256=s9bCxOszJMwRq1WWAmKjWq888mjDfbaJ18Wo7h-rNOw,1827
6
+ arkindex_worker/worker/__init__.py,sha256=tcqxrox9EpOjaN2EQgXumiABKpWHLsJiynPC2_sZuOQ,15880
7
+ arkindex_worker/worker/base.py,sha256=7Pmw-UQSxV-xkW8NO5cXsxJ8W8szzyppMaNjq_az81A,19844
8
+ arkindex_worker/worker/classification.py,sha256=zECSNzGCZFzoPoDVZN4kuGYRNLzMQLBaRt3q1jnBSaA,10952
9
+ arkindex_worker/worker/corpus.py,sha256=0TQFOwZ6Te-CZi6lgkZY1wzyJ5wO9LAmcVQtqHvZpPk,2291
10
10
  arkindex_worker/worker/dataset.py,sha256=LwzKwNFX4FqfLxh29LSvJydPwRw3VHaB1wjuFhUshsE,5267
11
11
  arkindex_worker/worker/element.py,sha256=Qvvq9kJnAHNATHW7zi96eIY1x-0MsR-T5rrSJg6e9Y4,45309
12
- arkindex_worker/worker/entity.py,sha256=qGjQvOVXfP84rER0Dkui6q-rb9nTWerHVG0Z5voB8pU,15229
12
+ arkindex_worker/worker/entity.py,sha256=ThhP22xOYR5Z4P1VH_pOl_y_uDKZFeQVDqxO6aRkIhg,15227
13
13
  arkindex_worker/worker/image.py,sha256=t_Az6IGnj0EZyvcA4XxfPikOUjn_pztgsyxTkFZhaXU,621
14
14
  arkindex_worker/worker/metadata.py,sha256=VRajtd2kaBvar9GercX4knvR6l1WFYjoCdJWU9ccKgk,7291
15
15
  arkindex_worker/worker/process.py,sha256=I1rBt3Y8bV4zcPr8N1E2NRZ0UClSTqhExsO9CPcP41E,1012
16
- arkindex_worker/worker/task.py,sha256=1O9zrWXxe3na3TOcoHX5Pxn1875v7EU08BSsCPnb62g,1519
17
- arkindex_worker/worker/training.py,sha256=qnBFEk11JOWWPLTbjF-lZ9iFBdTPpQzZAzQ9a03J1j4,10874
16
+ arkindex_worker/worker/task.py,sha256=r1j7_qbdNu2Z8H8HbGzO3P3qdx-2N1pBbUPFDca0rqg,1519
17
+ arkindex_worker/worker/training.py,sha256=H8FmCdzGcDW-WMMwcgvmZPlN5tPHwGo0BXn12qmzj8g,10875
18
18
  arkindex_worker/worker/transcription.py,sha256=52RY9kYsiR1sz9FxOigyo12Ker3VDbQ4U42gK9DpR3g,21146
19
19
  arkindex_worker/worker/version.py,sha256=JIT7OI3Mo7RPkNrjOB9hfqrsG-FYygz_zi4l8PbkuAo,1960
20
20
  hooks/pre_gen_project.py,sha256=xQJERv3vv9VzIqcBHI281eeWLWREXUF4mMw7PvJHHXM,269
@@ -22,31 +22,31 @@ tests/__init__.py,sha256=DG--S6IpGl399rzSAjDdHL76CkOIeZIjajCcyUSDhOQ,241
22
22
  tests/conftest.py,sha256=2ocZ2x-mZQrNe9zvWwhWk2_4ExdaBHIB74SvtDlExRE,21580
23
23
  tests/test_base_worker.py,sha256=2EIYcd_3f9O0zB5WiGIQV0Cn9wndLvnEnSfcAE1qWWU,30607
24
24
  tests/test_cache.py,sha256=ii0gyr0DrG7ChEs7pmT8hMdSguAOAcCze4bRMiFQxuk,10640
25
- tests/test_dataset_worker.py,sha256=r5SWdXSWKXGBFAgH6ouF2Nk-k6lLJ6SgXjKYD2Vu_Es,22088
25
+ tests/test_dataset_worker.py,sha256=gApYz0LArHr1cNn079_fa_BQABF6RVQYuM1Tc4m3NsQ,22089
26
26
  tests/test_element.py,sha256=2G9M15TLxQRmvrWM9Kw2ucnElh4kSv_oF_5FYwwAxTY,13181
27
27
  tests/test_image.py,sha256=J3jqB5OhcdCpB6n0UnwivxrMlne8YjFLXhq1gBMANrs,26711
28
28
  tests/test_merge.py,sha256=TuOeUS0UCz66DPOQFFhc4NQBxIjZL9f5czi4XnvGrr4,8270
29
29
  tests/test_utils.py,sha256=_WJUPnt-pM_TQ0er4yjPZy-u_LePrHq1lxwk_teky7M,2544
30
30
  tests/test_elements_worker/__init__.py,sha256=Fh4nkbbyJSMv_VtjQxnWrOqTnxXaaWI8S9WU0VrzCHs,179
31
- tests/test_elements_worker/test_classifications.py,sha256=fXZ8cSzIWwZ6LHsY7tKsy9-Pp9fKyKUStIXS4ViBcek,27779
31
+ tests/test_elements_worker/test_classifications.py,sha256=GtVyi9bg4YTd7nyw8u6IjQZYBwFMwoVZdrfSBc5UybU,27780
32
32
  tests/test_elements_worker/test_cli.py,sha256=a23i1pUDbXi23MUtbWwGEcLLrmc_YlrbDgOG3h66wLM,2620
33
- tests/test_elements_worker/test_corpus.py,sha256=c_LUHvkJIYgk_wXF06VQPNOoWfiZ06XpjOXrJ7MRiBc,4479
34
- tests/test_elements_worker/test_dataset.py,sha256=lSXqubhg1EEq2Y2goE8Y2RYaqIpM9Iejq6fGNW2BczU,11411
35
- tests/test_elements_worker/test_elements.py,sha256=JZmqddzxuEFhGNnYj8iE4kA7mODYbMGNf7STnA8CVL4,115496
36
- tests/test_elements_worker/test_entities.py,sha256=oav2dtvWWavQe1l3Drbxw1Ta2ocUJEVxJfDQ_r6-rYQ,36181
37
- tests/test_elements_worker/test_image.py,sha256=_E3UGdDOwTo1MW5KMS81PrdeSPBPWinWYoQPNy2F9Ro,2077
38
- tests/test_elements_worker/test_metadata.py,sha256=cm2NNaXxBYmYMkPexSPVTAqb2skDTB4mliwQCLz8Y98,22293
39
- tests/test_elements_worker/test_task.py,sha256=7Sr3fbjdgWUXJUhJEiC9CwnbhQIQX3rCInmHMIrmA38,5573
33
+ tests/test_elements_worker/test_corpus.py,sha256=OAbwgaQtHmcmPkcAl9Kuceun_BvMasnZvYj4_EdfugY,5483
34
+ tests/test_elements_worker/test_dataset.py,sha256=00IlOZv9YFlZ23rGXyR-HLbKLQxGelZ1Bf9lEZYA0IY,11412
35
+ tests/test_elements_worker/test_elements.py,sha256=l5YTfm0CzBTQyZvdOplhhza-gpPSz-8RVix1YUzAwhM,115497
36
+ tests/test_elements_worker/test_entities.py,sha256=nrCvkdJdjsyOrbD6R-H8NvxREZxciiR6CGIObXzeg50,36182
37
+ tests/test_elements_worker/test_image.py,sha256=BljMNKgec_9a5bzNzFpYZIvSbuvwsWDfdqLHVJaTa7M,2079
38
+ tests/test_elements_worker/test_metadata.py,sha256=Xfggy-vxw5DZ3hFKx3sB7OYb2d1tu1RiNK8fvKJIaBs,22294
39
+ tests/test_elements_worker/test_task.py,sha256=wTUWqN9UhfKmJn3IcFY75EW4I1ulRhisflmY1kmP47s,5574
40
40
  tests/test_elements_worker/test_training.py,sha256=Qxi9EzGr_uKcn2Fh5aE6jNrq1K8QKLiOiSew4upASPs,8721
41
- tests/test_elements_worker/test_transcriptions.py,sha256=FNY6E26iTKqe7LP9LO72By4oV4g9hBIZYTU9BAc_w7I,77060
42
- tests/test_elements_worker/test_worker.py,sha256=AuFDyqncIusT-rMMY4sEay9MqGvoNuSuZQq-5rHN02U,10803
41
+ tests/test_elements_worker/test_transcriptions.py,sha256=iq-nR_st7Q9E_nD7knrKGY57g36J6nYSEzbPk9y-cxY,77061
42
+ tests/test_elements_worker/test_worker.py,sha256=VdprIWezB3dJdE8vNOrS71RQugqUysHlveOWTQate-8,10804
43
43
  worker-demo/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  worker-demo/tests/conftest.py,sha256=XzNMNeg6pmABUAH8jN6eZTlZSFGLYjS3-DTXjiRN6Yc,1002
45
45
  worker-demo/tests/test_worker.py,sha256=3DLd4NRK4bfyatG5P_PK4k9P9tJHx9XQq5_ryFEEFVg,304
46
46
  worker-demo/worker_demo/__init__.py,sha256=2BPomV8ZMNf3YXJgloatKeHQCE6QOkwmsHGkO6MkQuM,125
47
47
  worker-demo/worker_demo/worker.py,sha256=Rt-DjWa5iBP08k58NDZMfeyPuFbtNcbX6nc5jFX7GNo,440
48
- arkindex_base_worker-0.4.0rc2.dist-info/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
49
- arkindex_base_worker-0.4.0rc2.dist-info/METADATA,sha256=Akdq43GSmQL2Sr7VpaBGMjKlQtb1z8lIEUUBqoXoWg8,3304
50
- arkindex_base_worker-0.4.0rc2.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
51
- arkindex_base_worker-0.4.0rc2.dist-info/top_level.txt,sha256=58NuslgxQC2vT4DiqZEgO4JqJRrYa2yeNI9QvkbfGQU,40
52
- arkindex_base_worker-0.4.0rc2.dist-info/RECORD,,
48
+ arkindex_base_worker-0.4.0rc3.dist-info/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
49
+ arkindex_base_worker-0.4.0rc3.dist-info/METADATA,sha256=eDT7HxTvEz2yg4U_lbzkuigNWFu4JTqaLTnY0fqSCiM,3306
50
+ arkindex_base_worker-0.4.0rc3.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
51
+ arkindex_base_worker-0.4.0rc3.dist-info/top_level.txt,sha256=58NuslgxQC2vT4DiqZEgO4JqJRrYa2yeNI9QvkbfGQU,40
52
+ arkindex_base_worker-0.4.0rc3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (74.0.0)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -10,8 +10,7 @@ from collections.abc import Iterable
10
10
  from itertools import chain
11
11
  from pathlib import Path
12
12
 
13
- from apistar.exceptions import ErrorResponse
14
-
13
+ from arkindex.exceptions import ErrorResponse
15
14
  from arkindex_worker import logger
16
15
  from arkindex_worker.cache import CachedElement
17
16
  from arkindex_worker.models import Dataset, Element, Set
@@ -12,9 +12,9 @@ from tempfile import mkdtemp
12
12
 
13
13
  import gnupg
14
14
  import yaml
15
- from apistar.exceptions import ErrorResponse
16
15
 
17
16
  from arkindex import options_from_env
17
+ from arkindex.exceptions import ErrorResponse
18
18
  from arkindex_worker import logger
19
19
  from arkindex_worker.cache import (
20
20
  check_version,
@@ -2,9 +2,9 @@
2
2
  ElementsWorker methods for classifications and ML classes.
3
3
  """
4
4
 
5
- from apistar.exceptions import ErrorResponse
6
5
  from peewee import IntegrityError
7
6
 
7
+ from arkindex.exceptions import ErrorResponse
8
8
  from arkindex_worker import logger
9
9
  from arkindex_worker.cache import CachedClassification, CachedElement
10
10
  from arkindex_worker.models import Element
@@ -5,6 +5,7 @@ BaseWorker methods for corpora.
5
5
  from enum import Enum
6
6
  from operator import itemgetter
7
7
  from tempfile import _TemporaryFileWrapper
8
+ from uuid import UUID
8
9
 
9
10
  from arkindex_worker import logger
10
11
 
@@ -36,6 +37,25 @@ class CorpusExportState(Enum):
36
37
 
37
38
 
38
39
  class CorpusMixin:
40
+ def download_export(self, export_id: str) -> _TemporaryFileWrapper:
41
+ """
42
+ Download an export.
43
+
44
+ :param export_id: UUID of the export to download
45
+ :returns: The downloaded export stored in a temporary file.
46
+ """
47
+ try:
48
+ UUID(export_id)
49
+ except ValueError as e:
50
+ raise ValueError("export_id is not a valid uuid.") from e
51
+
52
+ logger.info(f"Downloading export ({export_id})...")
53
+ export: _TemporaryFileWrapper = self.api_client.request(
54
+ "DownloadExport", id=export_id
55
+ )
56
+ logger.info(f"Downloaded export ({export_id}) @ `{export.name}`")
57
+ return export
58
+
39
59
  def download_latest_export(self) -> _TemporaryFileWrapper:
40
60
  """
41
61
  Download the latest export in `done` state of the current corpus.
@@ -62,10 +82,5 @@ class CorpusMixin:
62
82
 
63
83
  # Download latest export
64
84
  export_id: str = exports[0]["id"]
65
- logger.info(f"Downloading export ({export_id})...")
66
- export: _TemporaryFileWrapper = self.api_client.request(
67
- "DownloadExport", id=export_id
68
- )
69
- logger.info(f"Downloaded export ({export_id}) @ `{export.name}`")
70
85
 
71
- return export
86
+ return self.download_export(export_id)
@@ -302,7 +302,7 @@ class EntityMixin:
302
302
 
303
303
  created_entities = [
304
304
  created_entity
305
- for batch in make_batches(entities, "entities", batch_size)
305
+ for batch in make_batches(entities, "entity", batch_size)
306
306
  for created_entity in self.api_client.request(
307
307
  "CreateTranscriptionEntities",
308
308
  id=transcription.id,
@@ -5,8 +5,7 @@ BaseWorker methods for tasks.
5
5
  import uuid
6
6
  from collections.abc import Iterator
7
7
 
8
- from apistar.compat import DownloadedFile
9
-
8
+ from arkindex.compat import DownloadedFile
10
9
  from arkindex_worker.models import Artifact
11
10
 
12
11
 
@@ -9,8 +9,8 @@ from typing import NewType
9
9
  from uuid import UUID
10
10
 
11
11
  import requests
12
- from apistar.exceptions import ErrorResponse
13
12
 
13
+ from arkindex.exceptions import ErrorResponse
14
14
  from arkindex_worker import logger
15
15
  from arkindex_worker.utils import close_delete_file, create_tar_zst_archive
16
16
 
@@ -3,8 +3,8 @@ import uuid
3
3
  from argparse import ArgumentTypeError
4
4
 
5
5
  import pytest
6
- from apistar.exceptions import ErrorResponse
7
6
 
7
+ from arkindex.exceptions import ErrorResponse
8
8
  from arkindex_worker.models import Dataset, Set
9
9
  from arkindex_worker.worker.dataset import (
10
10
  DatasetState,
@@ -3,8 +3,8 @@ import re
3
3
  from uuid import UUID
4
4
 
5
5
  import pytest
6
- from apistar.exceptions import ErrorResponse
7
6
 
7
+ from arkindex.exceptions import ErrorResponse
8
8
  from arkindex_worker.cache import CachedClassification, CachedElement
9
9
  from arkindex_worker.models import Element
10
10
  from arkindex_worker.utils import DEFAULT_BATCH_SIZE
@@ -2,8 +2,8 @@ import re
2
2
  import uuid
3
3
 
4
4
  import pytest
5
- from apistar.exceptions import ErrorResponse
6
5
 
6
+ from arkindex.exceptions import ErrorResponse
7
7
  from arkindex_worker.worker.corpus import CorpusExportState
8
8
  from tests import CORPUS_ID
9
9
  from tests.test_elements_worker import BASE_API_CALLS
@@ -135,3 +135,34 @@ def test_download_latest_export(responses, mock_elements_worker):
135
135
  ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
136
136
  ("GET", f"http://testserver/api/v1/export/{export_id}/"),
137
137
  ]
138
+
139
+
140
+ def test_download_export_not_a_uuid(responses, mock_elements_worker):
141
+ with pytest.raises(ValueError, match="export_id is not a valid uuid."):
142
+ mock_elements_worker.download_export("mon export")
143
+
144
+
145
+ def test_download_export(responses, mock_elements_worker):
146
+ responses.add(
147
+ responses.GET,
148
+ "http://testserver/api/v1/export/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff/",
149
+ status=302,
150
+ body=b"some SQLite export",
151
+ content_type="application/x-sqlite3",
152
+ stream=True,
153
+ )
154
+
155
+ export = mock_elements_worker.download_export(
156
+ "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"
157
+ )
158
+ assert export.name == "/tmp/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"
159
+
160
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
161
+ assert [
162
+ (call.request.method, call.request.url) for call in responses.calls
163
+ ] == BASE_API_CALLS + [
164
+ (
165
+ "GET",
166
+ "http://testserver/api/v1/export/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff/",
167
+ ),
168
+ ]
@@ -2,8 +2,8 @@ import json
2
2
  import logging
3
3
 
4
4
  import pytest
5
- from apistar.exceptions import ErrorResponse
6
5
 
6
+ from arkindex.exceptions import ErrorResponse
7
7
  from arkindex_worker.models import Dataset, Element, Set
8
8
  from arkindex_worker.worker.dataset import DatasetState
9
9
  from tests import PROCESS_ID
@@ -4,9 +4,9 @@ from argparse import Namespace
4
4
  from uuid import UUID
5
5
 
6
6
  import pytest
7
- from apistar.exceptions import ErrorResponse
8
7
  from responses import matchers
9
8
 
9
+ from arkindex.exceptions import ErrorResponse
10
10
  from arkindex_worker.cache import (
11
11
  SQL_VERSION,
12
12
  CachedElement,
@@ -3,9 +3,9 @@ import re
3
3
  from uuid import UUID
4
4
 
5
5
  import pytest
6
- from apistar.exceptions import ErrorResponse
7
6
  from responses import matchers
8
7
 
8
+ from arkindex.exceptions import ErrorResponse
9
9
  from arkindex_worker.cache import (
10
10
  CachedElement,
11
11
  CachedEntity,
@@ -1,7 +1,8 @@
1
1
  import json
2
2
 
3
3
  import pytest
4
- from apistar.exceptions import ErrorResponse
4
+
5
+ from arkindex.exceptions import ErrorResponse
5
6
 
6
7
  from . import BASE_API_CALLS
7
8
 
@@ -2,8 +2,8 @@ import json
2
2
  import re
3
3
 
4
4
  import pytest
5
- from apistar.exceptions import ErrorResponse
6
5
 
6
+ from arkindex.exceptions import ErrorResponse
7
7
  from arkindex.mock import MockApiClient
8
8
  from arkindex_worker.cache import CachedElement
9
9
  from arkindex_worker.models import Element
@@ -1,8 +1,8 @@
1
1
  import uuid
2
2
 
3
3
  import pytest
4
- from apistar.exceptions import ErrorResponse
5
4
 
5
+ from arkindex.exceptions import ErrorResponse
6
6
  from arkindex_worker.models import Artifact
7
7
  from tests import FIXTURES_DIR
8
8
  from tests.test_elements_worker import BASE_API_CALLS
@@ -3,9 +3,9 @@ import re
3
3
  from uuid import UUID
4
4
 
5
5
  import pytest
6
- from apistar.exceptions import ErrorResponse
7
6
  from playhouse.shortcuts import model_to_dict
8
7
 
8
+ from arkindex.exceptions import ErrorResponse
9
9
  from arkindex_worker.cache import CachedElement, CachedTranscription
10
10
  from arkindex_worker.models import Element
11
11
  from arkindex_worker.utils import DEFAULT_BATCH_SIZE
@@ -2,8 +2,8 @@ import json
2
2
  import sys
3
3
 
4
4
  import pytest
5
- from apistar.exceptions import ErrorResponse
6
5
 
6
+ from arkindex.exceptions import ErrorResponse
7
7
  from arkindex_worker.cache import CachedElement
8
8
  from arkindex_worker.worker import ActivityState, ElementsWorker
9
9