arkindex-base-worker 0.5.2a1__py3-none-any.whl → 0.5.2a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.5.2a1.dist-info → arkindex_base_worker-0.5.2a2.dist-info}/METADATA +2 -1
- {arkindex_base_worker-0.5.2a1.dist-info → arkindex_base_worker-0.5.2a2.dist-info}/RECORD +10 -10
- {arkindex_base_worker-0.5.2a1.dist-info → arkindex_base_worker-0.5.2a2.dist-info}/WHEEL +1 -1
- arkindex_worker/image.py +2 -2
- arkindex_worker/worker/__init__.py +8 -7
- arkindex_worker/worker/task.py +53 -0
- tests/test_dataset_worker.py +50 -63
- tests/test_elements_worker/test_task.py +112 -0
- {arkindex_base_worker-0.5.2a1.dist-info → arkindex_base_worker-0.5.2a2.dist-info}/licenses/LICENSE +0 -0
- {arkindex_base_worker-0.5.2a1.dist-info → arkindex_base_worker-0.5.2a2.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: arkindex-base-worker
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2a2
|
|
4
4
|
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
5
|
Author-email: Teklia <contact@teklia.com>
|
|
6
6
|
Maintainer-email: Teklia <contact@teklia.com>
|
|
@@ -23,6 +23,7 @@ Requires-Dist: humanize==4.15.0
|
|
|
23
23
|
Requires-Dist: peewee~=3.17
|
|
24
24
|
Requires-Dist: Pillow==11.3.0
|
|
25
25
|
Requires-Dist: python-gnupg==0.5.6
|
|
26
|
+
Requires-Dist: python-magic==0.4.27
|
|
26
27
|
Requires-Dist: shapely==2.0.6
|
|
27
28
|
Requires-Dist: teklia-toolbox==0.1.12
|
|
28
29
|
Requires-Dist: zstandard==0.25.0
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
arkindex_base_worker-0.5.
|
|
1
|
+
arkindex_base_worker-0.5.2a2.dist-info/licenses/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
|
|
2
2
|
arkindex_worker/__init__.py,sha256=Sdt5KXn8EgURb2MurYVrUWaHbH3iFA1XLRo0Lc5AJ44,250
|
|
3
3
|
arkindex_worker/cache.py,sha256=XpEXMSnbhYCvrJquwA9XXqZo-ajMLpaCxKG5wH3Gp6Y,10959
|
|
4
|
-
arkindex_worker/image.py,sha256=
|
|
4
|
+
arkindex_worker/image.py,sha256=9KeZHWNIDkwNJZR0y-mbyD_pvKfrgdktMB32jZqSMYk,20927
|
|
5
5
|
arkindex_worker/models.py,sha256=DgKvAB_2e1cPcuUavZkyTkV10jBK8y083oVklB9idSk,10855
|
|
6
6
|
arkindex_worker/utils.py,sha256=Eqg5pGAuOmuwMT3EhKTQDMek7wHC1KzZL7XXqYVVfHY,10977
|
|
7
|
-
arkindex_worker/worker/__init__.py,sha256=
|
|
7
|
+
arkindex_worker/worker/__init__.py,sha256=tM_ynAARmtuJw5YWb_jI0AD5KNXbWN1K-VDiixIp7O4,18009
|
|
8
8
|
arkindex_worker/worker/base.py,sha256=-R_aLMJHbR6X1uM-U0zExsF_KLy5Wl3WJ_YMGO9We0I,22153
|
|
9
9
|
arkindex_worker/worker/classification.py,sha256=qvykymkgd4nGywHCxL8obo4egstoGsmWNS4Ztc1qNWQ,11024
|
|
10
10
|
arkindex_worker/worker/corpus.py,sha256=MeIMod7jkWyX0frtD0a37rhumnMV3p9ZOC1xwAoXrAA,2291
|
|
@@ -14,7 +14,7 @@ arkindex_worker/worker/entity.py,sha256=Aj6EOfzHEm7qQV-Egm0YKLZgCrLS_3ggOKTY81M2
|
|
|
14
14
|
arkindex_worker/worker/image.py,sha256=L6Ikuf0Z0RxJk7JarY5PggJGrYSHLaPK0vn0dy0CIaQ,623
|
|
15
15
|
arkindex_worker/worker/metadata.py,sha256=keZdOdUthSH2hAw9iet5pN7rzWihTUYjZHRGTEjaltw,6843
|
|
16
16
|
arkindex_worker/worker/process.py,sha256=9TEHpMcBax1wc6PrWMMrdXe2uNfqyVj7n_dAYZRBGnY,1854
|
|
17
|
-
arkindex_worker/worker/task.py,sha256=
|
|
17
|
+
arkindex_worker/worker/task.py,sha256=HASQU5LYVtgvCnRCLFC6iH7h7v6q_usZNZ-r_Wkv9A8,3306
|
|
18
18
|
arkindex_worker/worker/training.py,sha256=tyQOHcwv--_wdYz6CgLEe1YM7kwwwKN30LvGTsnWd78,10923
|
|
19
19
|
arkindex_worker/worker/transcription.py,sha256=sw718R119tsLNY8inPMVeIilvFJo94fMbMtYgH0zTM8,21250
|
|
20
20
|
examples/standalone/python/worker.py,sha256=Zr4s4pHvgexEjlkixLFYZp1UuwMLeoTxjyNG5_S2iYE,6672
|
|
@@ -24,7 +24,7 @@ tests/__init__.py,sha256=DG--S6IpGl399rzSAjDdHL76CkOIeZIjajCcyUSDhOQ,241
|
|
|
24
24
|
tests/conftest.py,sha256=Tp7YFK17NATwF2yAcBwi0QFNyKSXtLS0VhZ-zZngsQI,24343
|
|
25
25
|
tests/test_base_worker.py,sha256=lwS4X3atS2ktEKd1XdogmN3mbzq-tO206-k_0EDITlw,29302
|
|
26
26
|
tests/test_cache.py,sha256=_wztzh94EwVrb8UvpFqgl2aa2_FLaCcJKaqunCYR5Dw,10435
|
|
27
|
-
tests/test_dataset_worker.py,sha256=
|
|
27
|
+
tests/test_dataset_worker.py,sha256=LmL3ERF1__PUPkTLiAFC0IYglZTv5WQYA42Vm-uhe2w,22023
|
|
28
28
|
tests/test_element.py,sha256=hlj5VSF4plwC7uz9R4LGOOXZJQcHZiYCIDZT5V6EIB8,14334
|
|
29
29
|
tests/test_image.py,sha256=yAM5mMfpQcIurT1KLHmu0AhSX2Qm3YvCu7afyZ3XUdU,28314
|
|
30
30
|
tests/test_merge.py,sha256=REpZ13jkq_qm_4L5URQgFy5lxvPZtXxQEiWfYLMdmF0,7956
|
|
@@ -44,7 +44,7 @@ tests/test_elements_worker/test_entity.py,sha256=SNAZEsVVLnqlliOmjkgv_cZhw0bAuJU
|
|
|
44
44
|
tests/test_elements_worker/test_image.py,sha256=BljMNKgec_9a5bzNzFpYZIvSbuvwsWDfdqLHVJaTa7M,2079
|
|
45
45
|
tests/test_elements_worker/test_metadata.py,sha256=qtTDtlp3VnBkfck7PAguK2dEgTLlr1i1EVnmNTeNf3A,20515
|
|
46
46
|
tests/test_elements_worker/test_process.py,sha256=y4RoVhPfyHzR795fw7-_FXElBcKo3fy4Ew_HI-kxJic,3088
|
|
47
|
-
tests/test_elements_worker/test_task.py,sha256=
|
|
47
|
+
tests/test_elements_worker/test_task.py,sha256=oHwP1fbJftXFA2U4qA3Gb4vX-iJoV-sBvPHnfBBpRrc,8906
|
|
48
48
|
tests/test_elements_worker/test_training.py,sha256=qgK7BLucddRzc8ePbQtY75x17QvGDEq5XCwgyyvmAJE,8717
|
|
49
49
|
tests/test_elements_worker/test_transcription_create.py,sha256=yznO9B_BVsOR0Z_VY5ZL8gJp0ZPCz_4sPUs5dXtixAg,29281
|
|
50
50
|
tests/test_elements_worker/test_transcription_create_with_elements.py,sha256=tmcyglgssEqMnt1Mdy_u6X1m2wgLWTo_HdWst3GrK2k,33056
|
|
@@ -55,7 +55,7 @@ worker-demo/tests/conftest.py,sha256=XzNMNeg6pmABUAH8jN6eZTlZSFGLYjS3-DTXjiRN6Yc
|
|
|
55
55
|
worker-demo/tests/test_worker.py,sha256=3DLd4NRK4bfyatG5P_PK4k9P9tJHx9XQq5_ryFEEFVg,304
|
|
56
56
|
worker-demo/worker_demo/__init__.py,sha256=2BPomV8ZMNf3YXJgloatKeHQCE6QOkwmsHGkO6MkQuM,125
|
|
57
57
|
worker-demo/worker_demo/worker.py,sha256=Rt-DjWa5iBP08k58NDZMfeyPuFbtNcbX6nc5jFX7GNo,440
|
|
58
|
-
arkindex_base_worker-0.5.
|
|
59
|
-
arkindex_base_worker-0.5.
|
|
60
|
-
arkindex_base_worker-0.5.
|
|
61
|
-
arkindex_base_worker-0.5.
|
|
58
|
+
arkindex_base_worker-0.5.2a2.dist-info/METADATA,sha256=LyPpeyvKIadAuqir1cymTwxoWm3XovhF-JmzQ1LW0MI,1885
|
|
59
|
+
arkindex_base_worker-0.5.2a2.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
60
|
+
arkindex_base_worker-0.5.2a2.dist-info/top_level.txt,sha256=-vNjP2VfROx0j83mdi9aIqRZ88eoJjxeWz-R_gPgyXU,49
|
|
61
|
+
arkindex_base_worker-0.5.2a2.dist-info/RECORD,,
|
arkindex_worker/image.py
CHANGED
|
@@ -38,7 +38,7 @@ if TYPE_CHECKING:
|
|
|
38
38
|
from arkindex_worker.models import Element
|
|
39
39
|
|
|
40
40
|
# See http://docs.python-requests.org/en/master/user/advanced/#timeouts
|
|
41
|
-
|
|
41
|
+
REQUEST_TIMEOUT = (30, 60)
|
|
42
42
|
|
|
43
43
|
BoundingBox = namedtuple("BoundingBox", ["x", "y", "width", "height"])
|
|
44
44
|
|
|
@@ -346,7 +346,7 @@ def _retried_request(url, *args, method=requests.get, **kwargs):
|
|
|
346
346
|
url,
|
|
347
347
|
*args,
|
|
348
348
|
headers={"User-Agent": IIIF_USER_AGENT},
|
|
349
|
-
timeout=
|
|
349
|
+
timeout=REQUEST_TIMEOUT,
|
|
350
350
|
verify=should_verify_cert(url),
|
|
351
351
|
**kwargs,
|
|
352
352
|
)
|
|
@@ -424,12 +424,13 @@ class DatasetWorker(DatasetMixin, BaseWorker, TaskMixin):
|
|
|
424
424
|
failed = 0
|
|
425
425
|
for i, dataset_set in enumerate(dataset_sets, start=1):
|
|
426
426
|
try:
|
|
427
|
-
|
|
428
|
-
"
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
427
|
+
if dataset_set.dataset.state == DatasetState.Complete.value:
|
|
428
|
+
logger.info(f"Retrieving data for {dataset_set} ({i}/{count})")
|
|
429
|
+
self.download_dataset_artifact(dataset_set.dataset)
|
|
430
|
+
else:
|
|
431
|
+
logger.warning(
|
|
432
|
+
f"The dataset {dataset_set.dataset} has its state set to `{dataset_set.dataset.state}`, its archive will not be downloaded"
|
|
433
|
+
)
|
|
433
434
|
|
|
434
435
|
logger.info(f"Processing {dataset_set} ({i}/{count})")
|
|
435
436
|
self.process_set(dataset_set)
|
|
@@ -444,7 +445,7 @@ class DatasetWorker(DatasetMixin, BaseWorker, TaskMixin):
|
|
|
444
445
|
|
|
445
446
|
logger.warning(message, exc_info=e if self.args.verbose else None)
|
|
446
447
|
|
|
447
|
-
# Cleanup the latest downloaded dataset artifact
|
|
448
|
+
# Cleanup the latest downloaded dataset artifact (if needed)
|
|
448
449
|
self.cleanup_downloaded_artifact()
|
|
449
450
|
|
|
450
451
|
message = f"Ran on {count} {pluralize('set', count)}: {count - failed} completed, {failed} failed"
|
arkindex_worker/worker/task.py
CHANGED
|
@@ -4,9 +4,16 @@ BaseWorker methods for tasks.
|
|
|
4
4
|
|
|
5
5
|
import uuid
|
|
6
6
|
from collections.abc import Iterator
|
|
7
|
+
from http.client import REQUEST_TIMEOUT
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
import magic
|
|
11
|
+
import requests
|
|
7
12
|
|
|
8
13
|
from arkindex.compat import DownloadedFile
|
|
14
|
+
from arkindex_worker import logger
|
|
9
15
|
from arkindex_worker.models import Artifact
|
|
16
|
+
from teklia_toolbox.requests import should_verify_cert
|
|
10
17
|
|
|
11
18
|
|
|
12
19
|
class TaskMixin:
|
|
@@ -45,3 +52,49 @@ class TaskMixin:
|
|
|
45
52
|
return self.api_client.request(
|
|
46
53
|
"DownloadArtifact", id=task_id, path=artifact.path
|
|
47
54
|
)
|
|
55
|
+
|
|
56
|
+
def upload_artifact(self, path: Path) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Upload a single file as an Artifact of the current task.
|
|
59
|
+
|
|
60
|
+
:param path: Path of the single file to upload as an Artifact.
|
|
61
|
+
"""
|
|
62
|
+
assert path and isinstance(path, Path) and path.exists(), (
|
|
63
|
+
"path shouldn't be null, should be a Path and should exist"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
if self.is_read_only:
|
|
67
|
+
logger.warning("Cannot upload artifact as this worker is in read-only mode")
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
# Get path relative to task's data directory
|
|
71
|
+
relpath = str(path.relative_to(self.work_dir))
|
|
72
|
+
|
|
73
|
+
# Get file size
|
|
74
|
+
size = path.stat().st_size
|
|
75
|
+
|
|
76
|
+
# Detect content type
|
|
77
|
+
try:
|
|
78
|
+
content_type = magic.from_file(path, mime=True)
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.warning(f"Failed to get a mime type for {path}: {e}")
|
|
81
|
+
content_type = "application/octet-stream"
|
|
82
|
+
|
|
83
|
+
# Create artifact on API to get an S3 url
|
|
84
|
+
artifact = self.api_client.request(
|
|
85
|
+
"CreateArtifact",
|
|
86
|
+
id=self.task_id,
|
|
87
|
+
body={"path": relpath, "content_type": content_type, "size": size},
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Upload the file content to S3
|
|
91
|
+
s3_put_url = artifact["s3_put_url"]
|
|
92
|
+
with path.open("rb") as content:
|
|
93
|
+
resp = requests.put(
|
|
94
|
+
s3_put_url,
|
|
95
|
+
data=content,
|
|
96
|
+
headers={"Content-Type": content_type},
|
|
97
|
+
timeout=REQUEST_TIMEOUT,
|
|
98
|
+
verify=should_verify_cert(s3_put_url),
|
|
99
|
+
)
|
|
100
|
+
resp.raise_for_status()
|
tests/test_dataset_worker.py
CHANGED
|
@@ -435,34 +435,6 @@ def test_run_no_sets(mocker, caplog, mock_dataset_worker):
|
|
|
435
435
|
]
|
|
436
436
|
|
|
437
437
|
|
|
438
|
-
def test_run_initial_dataset_state_error(
|
|
439
|
-
mocker, responses, caplog, mock_dataset_worker, default_dataset
|
|
440
|
-
):
|
|
441
|
-
default_dataset.state = DatasetState.Building.value
|
|
442
|
-
mocker.patch(
|
|
443
|
-
"arkindex_worker.worker.DatasetWorker.list_sets",
|
|
444
|
-
return_value=[Set(name="train", dataset=default_dataset)],
|
|
445
|
-
)
|
|
446
|
-
|
|
447
|
-
with pytest.raises(SystemExit):
|
|
448
|
-
mock_dataset_worker.run()
|
|
449
|
-
|
|
450
|
-
assert len(responses.calls) == len(BASE_API_CALLS) * 2
|
|
451
|
-
assert [
|
|
452
|
-
(call.request.method, call.request.url) for call in responses.calls
|
|
453
|
-
] == BASE_API_CALLS * 2
|
|
454
|
-
|
|
455
|
-
assert [(level, message) for _, level, message in caplog.record_tuples] == [
|
|
456
|
-
(logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
|
|
457
|
-
(logging.INFO, "Modern configuration is not available"),
|
|
458
|
-
(
|
|
459
|
-
logging.WARNING,
|
|
460
|
-
"Failed running worker on Set (train) from Dataset (dataset_id): AssertionError('When processing a set, its dataset state should be Complete.')",
|
|
461
|
-
),
|
|
462
|
-
(logging.ERROR, "Ran on 1 set: 0 completed, 1 failed"),
|
|
463
|
-
]
|
|
464
|
-
|
|
465
|
-
|
|
466
438
|
def test_run_download_dataset_artifact_api_error(
|
|
467
439
|
mocker,
|
|
468
440
|
tmp_path,
|
|
@@ -570,16 +542,18 @@ def test_run_no_downloaded_dataset_artifact_error(
|
|
|
570
542
|
]
|
|
571
543
|
|
|
572
544
|
|
|
545
|
+
@pytest.mark.parametrize("dataset_state", DatasetState)
|
|
573
546
|
def test_run(
|
|
574
547
|
mocker,
|
|
575
548
|
tmp_path,
|
|
576
549
|
responses,
|
|
577
550
|
caplog,
|
|
551
|
+
dataset_state,
|
|
578
552
|
mock_dataset_worker,
|
|
579
553
|
default_dataset,
|
|
580
554
|
default_artifact,
|
|
581
555
|
):
|
|
582
|
-
default_dataset.state =
|
|
556
|
+
default_dataset.state = dataset_state.value
|
|
583
557
|
mocker.patch(
|
|
584
558
|
"arkindex_worker.worker.DatasetWorker.list_sets",
|
|
585
559
|
return_value=[Set(name="train", dataset=default_dataset)],
|
|
@@ -590,55 +564,68 @@ def test_run(
|
|
|
590
564
|
)
|
|
591
565
|
mock_process = mocker.patch("arkindex_worker.worker.DatasetWorker.process_set")
|
|
592
566
|
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
responses.
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
responses.
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
567
|
+
if dataset_state == DatasetState.Complete:
|
|
568
|
+
archive_path = (
|
|
569
|
+
FIXTURES_DIR
|
|
570
|
+
/ "extract_parent_archives"
|
|
571
|
+
/ "first_parent"
|
|
572
|
+
/ "arkindex_data.tar.zst"
|
|
573
|
+
)
|
|
574
|
+
responses.add(
|
|
575
|
+
responses.GET,
|
|
576
|
+
f"http://testserver/api/v1/task/{default_dataset.task_id}/artifacts/",
|
|
577
|
+
status=200,
|
|
578
|
+
json=[default_artifact],
|
|
579
|
+
)
|
|
580
|
+
responses.add(
|
|
581
|
+
responses.GET,
|
|
582
|
+
f"http://testserver/api/v1/task/{default_dataset.task_id}/artifact/dataset_id.tar.zst",
|
|
583
|
+
status=200,
|
|
584
|
+
body=archive_path.read_bytes(),
|
|
585
|
+
content_type="application/zstd",
|
|
586
|
+
)
|
|
612
587
|
|
|
613
588
|
mock_dataset_worker.run()
|
|
614
589
|
|
|
615
590
|
assert mock_process.call_count == 1
|
|
616
591
|
|
|
617
|
-
|
|
592
|
+
# We only download the dataset archive when it is Complete
|
|
593
|
+
extra_calls = []
|
|
594
|
+
if dataset_state == DatasetState.Complete:
|
|
595
|
+
extra_calls = [
|
|
596
|
+
(
|
|
597
|
+
"GET",
|
|
598
|
+
f"http://testserver/api/v1/task/{default_dataset.task_id}/artifacts/",
|
|
599
|
+
),
|
|
600
|
+
(
|
|
601
|
+
"GET",
|
|
602
|
+
f"http://testserver/api/v1/task/{default_dataset.task_id}/artifact/dataset_id.tar.zst",
|
|
603
|
+
),
|
|
604
|
+
]
|
|
605
|
+
|
|
606
|
+
assert len(responses.calls) == len(BASE_API_CALLS) * 2 + len(extra_calls)
|
|
618
607
|
assert [
|
|
619
608
|
(call.request.method, call.request.url) for call in responses.calls
|
|
620
|
-
] == BASE_API_CALLS * 2 +
|
|
621
|
-
(
|
|
622
|
-
"GET",
|
|
623
|
-
f"http://testserver/api/v1/task/{default_dataset.task_id}/artifacts/",
|
|
624
|
-
),
|
|
625
|
-
(
|
|
626
|
-
"GET",
|
|
627
|
-
f"http://testserver/api/v1/task/{default_dataset.task_id}/artifact/dataset_id.tar.zst",
|
|
628
|
-
),
|
|
629
|
-
]
|
|
609
|
+
] == BASE_API_CALLS * 2 + extra_calls
|
|
630
610
|
|
|
631
|
-
|
|
611
|
+
logs = [
|
|
632
612
|
(logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
|
|
633
613
|
(logging.INFO, "Modern configuration is not available"),
|
|
634
614
|
(
|
|
635
|
-
logging.
|
|
636
|
-
"
|
|
615
|
+
logging.WARNING,
|
|
616
|
+
f"The dataset Dataset (dataset_id) has its state set to `{dataset_state.value}`, its archive will not be downloaded",
|
|
637
617
|
),
|
|
638
|
-
(logging.INFO, "Downloading artifact for Dataset (dataset_id)"),
|
|
639
618
|
(logging.INFO, "Processing Set (train) from Dataset (dataset_id) (1/1)"),
|
|
640
619
|
(logging.INFO, "Ran on 1 set: 1 completed, 0 failed"),
|
|
641
620
|
]
|
|
621
|
+
if dataset_state == DatasetState.Complete:
|
|
622
|
+
logs[2] = (
|
|
623
|
+
logging.INFO,
|
|
624
|
+
"Retrieving data for Set (train) from Dataset (dataset_id) (1/1)",
|
|
625
|
+
)
|
|
626
|
+
logs.insert(3, (logging.INFO, "Downloading artifact for Dataset (dataset_id)"))
|
|
627
|
+
|
|
628
|
+
assert [(level, message) for _, level, message in caplog.record_tuples] == logs
|
|
642
629
|
|
|
643
630
|
|
|
644
631
|
def test_run_read_only(
|
|
@@ -1,6 +1,9 @@
|
|
|
1
|
+
import tempfile
|
|
1
2
|
import uuid
|
|
3
|
+
from pathlib import Path
|
|
2
4
|
|
|
3
5
|
import pytest
|
|
6
|
+
from requests import HTTPError
|
|
4
7
|
|
|
5
8
|
from arkindex.exceptions import ErrorResponse
|
|
6
9
|
from arkindex_worker.models import Artifact
|
|
@@ -196,3 +199,112 @@ def test_download_artifact(
|
|
|
196
199
|
] == BASE_API_CALLS + [
|
|
197
200
|
("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst"),
|
|
198
201
|
]
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@pytest.mark.parametrize(
|
|
205
|
+
("payload", "error"),
|
|
206
|
+
[
|
|
207
|
+
# Path
|
|
208
|
+
(
|
|
209
|
+
{"path": None},
|
|
210
|
+
"path shouldn't be null, should be a Path and should exist",
|
|
211
|
+
),
|
|
212
|
+
(
|
|
213
|
+
{"path": "not path type"},
|
|
214
|
+
"path shouldn't be null, should be a Path and should exist",
|
|
215
|
+
),
|
|
216
|
+
(
|
|
217
|
+
{"path": Path("i_do_no_exist.oops")},
|
|
218
|
+
"path shouldn't be null, should be a Path and should exist",
|
|
219
|
+
),
|
|
220
|
+
],
|
|
221
|
+
)
|
|
222
|
+
def test_upload_artifact_wrong_param_path(mock_dataset_worker, payload, error):
|
|
223
|
+
with pytest.raises(AssertionError, match=error):
|
|
224
|
+
mock_dataset_worker.upload_artifact(**payload)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
@pytest.fixture
|
|
228
|
+
def tmp_file(mock_dataset_worker):
|
|
229
|
+
with tempfile.NamedTemporaryFile(
|
|
230
|
+
mode="w", suffix=".txt", dir=mock_dataset_worker.work_dir
|
|
231
|
+
) as file:
|
|
232
|
+
file.write("Some content...")
|
|
233
|
+
file.seek(0)
|
|
234
|
+
|
|
235
|
+
yield Path(file.name)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def test_upload_artifact_api_error(responses, mock_dataset_worker, tmp_file):
|
|
239
|
+
responses.add(
|
|
240
|
+
responses.POST,
|
|
241
|
+
"http://testserver/api/v1/task/my_task/artifacts/",
|
|
242
|
+
status=418,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
with pytest.raises(ErrorResponse):
|
|
246
|
+
mock_dataset_worker.upload_artifact(path=tmp_file)
|
|
247
|
+
|
|
248
|
+
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
249
|
+
assert [
|
|
250
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
251
|
+
] == BASE_API_CALLS + [("POST", "http://testserver/api/v1/task/my_task/artifacts/")]
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def test_upload_artifact_s3_upload_error(
|
|
255
|
+
responses,
|
|
256
|
+
mock_dataset_worker,
|
|
257
|
+
tmp_file,
|
|
258
|
+
):
|
|
259
|
+
responses.add(
|
|
260
|
+
responses.POST,
|
|
261
|
+
"http://testserver/api/v1/task/my_task/artifacts/",
|
|
262
|
+
json={
|
|
263
|
+
"id": "11111111-1111-1111-1111-111111111111",
|
|
264
|
+
"path": tmp_file.name,
|
|
265
|
+
"size": 15,
|
|
266
|
+
"content_type": "text/plain",
|
|
267
|
+
"s3_put_url": "http://example.com/oops.txt",
|
|
268
|
+
},
|
|
269
|
+
)
|
|
270
|
+
responses.add(responses.PUT, "http://example.com/oops.txt", status=500)
|
|
271
|
+
|
|
272
|
+
with pytest.raises(HTTPError):
|
|
273
|
+
mock_dataset_worker.upload_artifact(path=tmp_file)
|
|
274
|
+
|
|
275
|
+
assert len(responses.calls) == len(BASE_API_CALLS) + 2
|
|
276
|
+
assert [
|
|
277
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
278
|
+
] == BASE_API_CALLS + [
|
|
279
|
+
("POST", "http://testserver/api/v1/task/my_task/artifacts/"),
|
|
280
|
+
("PUT", "http://example.com/oops.txt"),
|
|
281
|
+
]
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def test_upload_artifact(
|
|
285
|
+
responses,
|
|
286
|
+
mock_dataset_worker,
|
|
287
|
+
tmp_file,
|
|
288
|
+
):
|
|
289
|
+
responses.add(
|
|
290
|
+
responses.POST,
|
|
291
|
+
"http://testserver/api/v1/task/my_task/artifacts/",
|
|
292
|
+
json={
|
|
293
|
+
"id": "11111111-1111-1111-1111-111111111111",
|
|
294
|
+
"path": tmp_file.name,
|
|
295
|
+
"size": 15,
|
|
296
|
+
"content_type": "text/plain",
|
|
297
|
+
"s3_put_url": "http://example.com/test.txt",
|
|
298
|
+
},
|
|
299
|
+
)
|
|
300
|
+
responses.add(responses.PUT, "http://example.com/test.txt")
|
|
301
|
+
|
|
302
|
+
mock_dataset_worker.upload_artifact(path=tmp_file)
|
|
303
|
+
|
|
304
|
+
assert len(responses.calls) == len(BASE_API_CALLS) + 2
|
|
305
|
+
assert [
|
|
306
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
307
|
+
] == BASE_API_CALLS + [
|
|
308
|
+
("POST", "http://testserver/api/v1/task/my_task/artifacts/"),
|
|
309
|
+
("PUT", "http://example.com/test.txt"),
|
|
310
|
+
]
|
{arkindex_base_worker-0.5.2a1.dist-info → arkindex_base_worker-0.5.2a2.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{arkindex_base_worker-0.5.2a1.dist-info → arkindex_base_worker-0.5.2a2.dist-info}/top_level.txt
RENAMED
|
File without changes
|