arkindex-base-worker 0.3.7rc7__py3-none-any.whl → 0.3.7rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.3.7rc7.dist-info → arkindex_base_worker-0.3.7rc9.dist-info}/METADATA +5 -1
- {arkindex_base_worker-0.3.7rc7.dist-info → arkindex_base_worker-0.3.7rc9.dist-info}/RECORD +10 -9
- arkindex_worker/utils.py +13 -0
- arkindex_worker/worker/__init__.py +7 -5
- tests/test_dataset_worker.py +15 -12
- tests/test_utils.py +23 -1
- worker-demo/tests/__init__.py +0 -0
- {arkindex_base_worker-0.3.7rc7.dist-info → arkindex_base_worker-0.3.7rc9.dist-info}/LICENSE +0 -0
- {arkindex_base_worker-0.3.7rc7.dist-info → arkindex_base_worker-0.3.7rc9.dist-info}/WHEEL +0 -0
- {arkindex_base_worker-0.3.7rc7.dist-info → arkindex_base_worker-0.3.7rc9.dist-info}/top_level.txt +0 -0
{arkindex_base_worker-0.3.7rc7.dist-info → arkindex_base_worker-0.3.7rc9.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: arkindex-base-worker
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.7rc9
|
|
4
4
|
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
5
|
Author-email: Teklia <contact@teklia.com>
|
|
6
6
|
Maintainer-email: Teklia <contact@teklia.com>
|
|
@@ -56,6 +56,10 @@ Requires-Dist: mkdocs-material ==9.5.10 ; extra == 'docs'
|
|
|
56
56
|
Requires-Dist: mkdocstrings ==0.24.0 ; extra == 'docs'
|
|
57
57
|
Requires-Dist: mkdocstrings-python ==1.8.0 ; extra == 'docs'
|
|
58
58
|
Requires-Dist: recommonmark ==0.7.1 ; extra == 'docs'
|
|
59
|
+
Provides-Extra: tests
|
|
60
|
+
Requires-Dist: pytest ==8.0.1 ; extra == 'tests'
|
|
61
|
+
Requires-Dist: pytest-mock ==3.12.0 ; extra == 'tests'
|
|
62
|
+
Requires-Dist: pytest-responses ==0.5.1 ; extra == 'tests'
|
|
59
63
|
|
|
60
64
|
# Arkindex base Worker
|
|
61
65
|
|
|
@@ -2,8 +2,8 @@ arkindex_worker/__init__.py,sha256=OlgCtTC9MaWeejviY0a3iQpALcRQGMVArFVVYwTF6I8,1
|
|
|
2
2
|
arkindex_worker/cache.py,sha256=FTlB0coXofn5zTNRTcVIvh709mcw4a1bPGqkwWjKs3w,11248
|
|
3
3
|
arkindex_worker/image.py,sha256=5ymIGaTm2D7Sp2YYQkbuheuGnx5VJo0_AzYAEIvNGhs,14267
|
|
4
4
|
arkindex_worker/models.py,sha256=xSvOadkNg3rgccic1xLgonzP28ugzmcGw0IUqXn51Cc,9844
|
|
5
|
-
arkindex_worker/utils.py,sha256=
|
|
6
|
-
arkindex_worker/worker/__init__.py,sha256=
|
|
5
|
+
arkindex_worker/utils.py,sha256=0Mu7Fa8DVcHn19pg-FIXqMDpfgzQkb7QR9IAlAi-x_k,7243
|
|
6
|
+
arkindex_worker/worker/__init__.py,sha256=U-_zOrQ09xmpBF9SmrTVj_UwnsCjFueV5G2hJAFEwv0,18806
|
|
7
7
|
arkindex_worker/worker/base.py,sha256=qtkCGfpGn7SWsQZRJ5cpW0gQ4tV_cyR_AHbuHZr53z4,19585
|
|
8
8
|
arkindex_worker/worker/classification.py,sha256=JVz-6YEeuavOy7zGfQi4nE_wpj9hwMUZDXTem-hXQY8,10328
|
|
9
9
|
arkindex_worker/worker/dataset.py,sha256=roX2IMMNA-icteTtRADiFSZiZSRPClqS62ZPJm9s2JI,2923
|
|
@@ -19,11 +19,11 @@ tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
19
19
|
tests/conftest.py,sha256=Oi5SJic4TNwDj8Pm0WHgg657yB7_JKxbLC0HYPI3RUc,22134
|
|
20
20
|
tests/test_base_worker.py,sha256=Uq6_MpLW23gmKFXkU-SyDUaA_4dlViLBGG4e3gpBBz0,24512
|
|
21
21
|
tests/test_cache.py,sha256=ii0gyr0DrG7ChEs7pmT8hMdSguAOAcCze4bRMiFQxuk,10640
|
|
22
|
-
tests/test_dataset_worker.py,sha256=
|
|
22
|
+
tests/test_dataset_worker.py,sha256=1joFRFmkL6XfPL9y1NYB_5QO-5FF56rwigAHrqtJMMA,23848
|
|
23
23
|
tests/test_element.py,sha256=2G9M15TLxQRmvrWM9Kw2ucnElh4kSv_oF_5FYwwAxTY,13181
|
|
24
24
|
tests/test_image.py,sha256=FZv8njLxh45sVgmY71UFHt0lv1cHr0cK4rrtPhQleX8,16262
|
|
25
25
|
tests/test_merge.py,sha256=Q4zCbtZbe0wBfqE56gvAD06c6pDuhqnjKaioFqIgAQw,8331
|
|
26
|
-
tests/test_utils.py,sha256=
|
|
26
|
+
tests/test_utils.py,sha256=vpeHMeL7bJQonv5ZEbJmlJikqVKn5VWlVEbvmYFzDYA,1650
|
|
27
27
|
tests/test_elements_worker/__init__.py,sha256=Fh4nkbbyJSMv_VtjQxnWrOqTnxXaaWI8S9WU0VrzCHs,179
|
|
28
28
|
tests/test_elements_worker/test_classifications.py,sha256=vU6al1THtDSmERyVscMXaqiRPwTllcpRUHyeyBQ8M9U,26417
|
|
29
29
|
tests/test_elements_worker/test_cli.py,sha256=BsFTswLti63WAZ2pf6ipiZKWJJyCQuSfuKnSlESuK8g,2878
|
|
@@ -35,12 +35,13 @@ tests/test_elements_worker/test_task.py,sha256=FCpxE9UpouKXgjGvWgNHEai_Hiy2d1Ymq
|
|
|
35
35
|
tests/test_elements_worker/test_training.py,sha256=WeG-cDuJ-YhPgfKH47TtXBxyargtLuk7c8tsik2WnL8,8414
|
|
36
36
|
tests/test_elements_worker/test_transcriptions.py,sha256=WVJG26sZyY66fu-Eka9A1_WWIeNI2scogjypzURnp8A,73468
|
|
37
37
|
tests/test_elements_worker/test_worker.py,sha256=7-jGJVT3yMGpIyN96Uafz5eIUrO4ieNLgw0k1D8BhGc,17163
|
|
38
|
+
worker-demo/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
39
|
worker-demo/tests/conftest.py,sha256=XzNMNeg6pmABUAH8jN6eZTlZSFGLYjS3-DTXjiRN6Yc,1002
|
|
39
40
|
worker-demo/tests/test_worker.py,sha256=3DLd4NRK4bfyatG5P_PK4k9P9tJHx9XQq5_ryFEEFVg,304
|
|
40
41
|
worker-demo/worker_demo/__init__.py,sha256=2BPomV8ZMNf3YXJgloatKeHQCE6QOkwmsHGkO6MkQuM,125
|
|
41
42
|
worker-demo/worker_demo/worker.py,sha256=Rt-DjWa5iBP08k58NDZMfeyPuFbtNcbX6nc5jFX7GNo,440
|
|
42
|
-
arkindex_base_worker-0.3.
|
|
43
|
-
arkindex_base_worker-0.3.
|
|
44
|
-
arkindex_base_worker-0.3.
|
|
45
|
-
arkindex_base_worker-0.3.
|
|
46
|
-
arkindex_base_worker-0.3.
|
|
43
|
+
arkindex_base_worker-0.3.7rc9.dist-info/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
|
|
44
|
+
arkindex_base_worker-0.3.7rc9.dist-info/METADATA,sha256=qQZcH4ER5oq9pqZ3HqWVpVnQHTZWm8uBlWhHSK7Zz6g,3565
|
|
45
|
+
arkindex_base_worker-0.3.7rc9.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
46
|
+
arkindex_base_worker-0.3.7rc9.dist-info/top_level.txt,sha256=58NuslgxQC2vT4DiqZEgO4JqJRrYa2yeNI9QvkbfGQU,40
|
|
47
|
+
arkindex_base_worker-0.3.7rc9.dist-info/RECORD,,
|
arkindex_worker/utils.py
CHANGED
|
@@ -10,6 +10,19 @@ import zstandard as zstd
|
|
|
10
10
|
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
13
|
+
MANUAL_SOURCE = "manual"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def parse_source_id(value: str) -> bool | str | None:
|
|
17
|
+
"""
|
|
18
|
+
Parse a UUID argument (Worker Version, Worker Run, ...) to use it directly in the API.
|
|
19
|
+
Arkindex API filters generally expect `False` to filter manual sources.
|
|
20
|
+
"""
|
|
21
|
+
if value == MANUAL_SOURCE:
|
|
22
|
+
return False
|
|
23
|
+
return value or None
|
|
24
|
+
|
|
25
|
+
|
|
13
26
|
CHUNK_SIZE = 1024
|
|
14
27
|
"""Chunk Size used for ZSTD compression"""
|
|
15
28
|
|
|
@@ -351,7 +351,9 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
|
|
|
351
351
|
"""
|
|
352
352
|
super().__init__(description, support_cache)
|
|
353
353
|
|
|
354
|
-
|
|
354
|
+
# Path to the dataset compressed archive (containing images and a SQLite database)
|
|
355
|
+
# Set as an instance variable as dataset workers might use it to easily extract its content
|
|
356
|
+
self.downloaded_dataset_artifact: Path | None = None
|
|
355
357
|
|
|
356
358
|
self.parser.add_argument(
|
|
357
359
|
"--set",
|
|
@@ -389,12 +391,12 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
|
|
|
389
391
|
|
|
390
392
|
def cleanup_downloaded_artifact(self) -> None:
|
|
391
393
|
"""
|
|
392
|
-
Cleanup the downloaded artifact if any
|
|
394
|
+
Cleanup the downloaded dataset artifact if any
|
|
393
395
|
"""
|
|
394
|
-
if not self.
|
|
396
|
+
if not self.downloaded_dataset_artifact:
|
|
395
397
|
return
|
|
396
398
|
|
|
397
|
-
self.
|
|
399
|
+
self.downloaded_dataset_artifact.unlink(missing_ok=True)
|
|
398
400
|
|
|
399
401
|
def download_dataset_artifact(self, dataset: Dataset) -> None:
|
|
400
402
|
"""
|
|
@@ -420,7 +422,7 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
|
|
|
420
422
|
continue
|
|
421
423
|
|
|
422
424
|
archive.write_bytes(self.download_artifact(task_id, artifact).read())
|
|
423
|
-
self.
|
|
425
|
+
self.downloaded_dataset_artifact = archive
|
|
424
426
|
return
|
|
425
427
|
|
|
426
428
|
raise MissingDatasetArchive(
|
tests/test_dataset_worker.py
CHANGED
|
@@ -38,18 +38,18 @@ def test_check_dataset_set():
|
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
def test_cleanup_downloaded_artifact_no_download(mock_dataset_worker):
|
|
41
|
-
assert not mock_dataset_worker.
|
|
41
|
+
assert not mock_dataset_worker.downloaded_dataset_artifact
|
|
42
42
|
# Do nothing
|
|
43
43
|
mock_dataset_worker.cleanup_downloaded_artifact()
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
def test_cleanup_downloaded_artifact(mock_dataset_worker, tmp_archive):
|
|
47
|
-
mock_dataset_worker.
|
|
47
|
+
mock_dataset_worker.downloaded_dataset_artifact = tmp_archive
|
|
48
48
|
|
|
49
|
-
assert mock_dataset_worker.
|
|
49
|
+
assert mock_dataset_worker.downloaded_dataset_artifact.exists()
|
|
50
50
|
# Unlink the downloaded archive
|
|
51
51
|
mock_dataset_worker.cleanup_downloaded_artifact()
|
|
52
|
-
assert not mock_dataset_worker.
|
|
52
|
+
assert not mock_dataset_worker.downloaded_dataset_artifact.exists()
|
|
53
53
|
|
|
54
54
|
# Unlinking again does not raise an error even if the archive no longer exists
|
|
55
55
|
mock_dataset_worker.cleanup_downloaded_artifact()
|
|
@@ -230,8 +230,8 @@ def test_download_dataset_artifact(
|
|
|
230
230
|
)
|
|
231
231
|
|
|
232
232
|
if downloaded_cache:
|
|
233
|
-
mock_dataset_worker.
|
|
234
|
-
previous_artifact = mock_dataset_worker.
|
|
233
|
+
mock_dataset_worker.downloaded_dataset_artifact = tmp_archive
|
|
234
|
+
previous_artifact = mock_dataset_worker.downloaded_dataset_artifact
|
|
235
235
|
|
|
236
236
|
mock_dataset_worker.download_dataset_artifact(default_dataset)
|
|
237
237
|
|
|
@@ -239,12 +239,15 @@ def test_download_dataset_artifact(
|
|
|
239
239
|
if previous_artifact:
|
|
240
240
|
assert not previous_artifact.exists()
|
|
241
241
|
|
|
242
|
-
assert mock_dataset_worker.downloaded_artifact == tmp_path / "dataset_id.tar.zst"
|
|
243
242
|
assert (
|
|
244
|
-
mock_dataset_worker.
|
|
243
|
+
mock_dataset_worker.downloaded_dataset_artifact
|
|
244
|
+
== tmp_path / "dataset_id.tar.zst"
|
|
245
|
+
)
|
|
246
|
+
assert (
|
|
247
|
+
mock_dataset_worker.downloaded_dataset_artifact.read_bytes()
|
|
245
248
|
== archive_path.read_bytes()
|
|
246
249
|
)
|
|
247
|
-
mock_dataset_worker.
|
|
250
|
+
mock_dataset_worker.downloaded_dataset_artifact.unlink()
|
|
248
251
|
|
|
249
252
|
assert len(responses.calls) == len(BASE_API_CALLS) + 2
|
|
250
253
|
assert [
|
|
@@ -264,11 +267,11 @@ def test_download_dataset_artifact_already_exists(
|
|
|
264
267
|
)
|
|
265
268
|
already_downloaded = tmp_path / "dataset_id.tar.zst"
|
|
266
269
|
already_downloaded.write_bytes(b"Some content")
|
|
267
|
-
mock_dataset_worker.
|
|
270
|
+
mock_dataset_worker.downloaded_dataset_artifact = already_downloaded
|
|
268
271
|
|
|
269
272
|
mock_dataset_worker.download_dataset_artifact(default_dataset)
|
|
270
273
|
|
|
271
|
-
assert mock_dataset_worker.
|
|
274
|
+
assert mock_dataset_worker.downloaded_dataset_artifact == already_downloaded
|
|
272
275
|
already_downloaded.unlink()
|
|
273
276
|
|
|
274
277
|
assert len(responses.calls) == len(BASE_API_CALLS)
|
|
@@ -534,7 +537,7 @@ def test_run_download_dataset_artifact_api_error(
|
|
|
534
537
|
]
|
|
535
538
|
|
|
536
539
|
|
|
537
|
-
def
|
|
540
|
+
def test_run_no_downloaded_dataset_artifact_error(
|
|
538
541
|
mocker,
|
|
539
542
|
tmp_path,
|
|
540
543
|
responses,
|
tests/test_utils.py
CHANGED
|
@@ -1,11 +1,33 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from arkindex_worker.utils import (
|
|
6
|
+
close_delete_file,
|
|
7
|
+
extract_tar_zst_archive,
|
|
8
|
+
parse_source_id,
|
|
9
|
+
)
|
|
4
10
|
|
|
5
11
|
FIXTURES = Path(__file__).absolute().parent / "data"
|
|
6
12
|
ARCHIVE = FIXTURES / "archive.tar.zst"
|
|
7
13
|
|
|
8
14
|
|
|
15
|
+
@pytest.mark.parametrize(
|
|
16
|
+
("source_id", "expected"),
|
|
17
|
+
[
|
|
18
|
+
(None, None),
|
|
19
|
+
("", None),
|
|
20
|
+
(
|
|
21
|
+
"cafecafe-cafe-cafe-cafe-cafecafecafe",
|
|
22
|
+
"cafecafe-cafe-cafe-cafe-cafecafecafe",
|
|
23
|
+
),
|
|
24
|
+
("manual", False),
|
|
25
|
+
],
|
|
26
|
+
)
|
|
27
|
+
def test_parse_source_id(source_id, expected):
|
|
28
|
+
assert parse_source_id(source_id) == expected
|
|
29
|
+
|
|
30
|
+
|
|
9
31
|
def test_extract_tar_zst_archive(tmp_path):
|
|
10
32
|
destination = tmp_path / "destination"
|
|
11
33
|
_, archive_path = extract_tar_zst_archive(ARCHIVE, destination)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{arkindex_base_worker-0.3.7rc7.dist-info → arkindex_base_worker-0.3.7rc9.dist-info}/top_level.txt
RENAMED
|
File without changes
|