arkindex-base-worker 0.5.2a1__py3-none-any.whl → 0.5.2a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arkindex-base-worker
3
- Version: 0.5.2a1
3
+ Version: 0.5.2a2
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -23,6 +23,7 @@ Requires-Dist: humanize==4.15.0
23
23
  Requires-Dist: peewee~=3.17
24
24
  Requires-Dist: Pillow==11.3.0
25
25
  Requires-Dist: python-gnupg==0.5.6
26
+ Requires-Dist: python-magic==0.4.27
26
27
  Requires-Dist: shapely==2.0.6
27
28
  Requires-Dist: teklia-toolbox==0.1.12
28
29
  Requires-Dist: zstandard==0.25.0
@@ -1,10 +1,10 @@
1
- arkindex_base_worker-0.5.2a1.dist-info/licenses/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
1
+ arkindex_base_worker-0.5.2a2.dist-info/licenses/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
2
2
  arkindex_worker/__init__.py,sha256=Sdt5KXn8EgURb2MurYVrUWaHbH3iFA1XLRo0Lc5AJ44,250
3
3
  arkindex_worker/cache.py,sha256=XpEXMSnbhYCvrJquwA9XXqZo-ajMLpaCxKG5wH3Gp6Y,10959
4
- arkindex_worker/image.py,sha256=sGE8to5iykXv25bpkftOEWzlh5NzBZSKy4lSRoHYHPU,20929
4
+ arkindex_worker/image.py,sha256=9KeZHWNIDkwNJZR0y-mbyD_pvKfrgdktMB32jZqSMYk,20927
5
5
  arkindex_worker/models.py,sha256=DgKvAB_2e1cPcuUavZkyTkV10jBK8y083oVklB9idSk,10855
6
6
  arkindex_worker/utils.py,sha256=Eqg5pGAuOmuwMT3EhKTQDMek7wHC1KzZL7XXqYVVfHY,10977
7
- arkindex_worker/worker/__init__.py,sha256=SzD0s1_m6gMV02EUF-NeciqZdVPA4dpXI84tSj-g494,17869
7
+ arkindex_worker/worker/__init__.py,sha256=tM_ynAARmtuJw5YWb_jI0AD5KNXbWN1K-VDiixIp7O4,18009
8
8
  arkindex_worker/worker/base.py,sha256=-R_aLMJHbR6X1uM-U0zExsF_KLy5Wl3WJ_YMGO9We0I,22153
9
9
  arkindex_worker/worker/classification.py,sha256=qvykymkgd4nGywHCxL8obo4egstoGsmWNS4Ztc1qNWQ,11024
10
10
  arkindex_worker/worker/corpus.py,sha256=MeIMod7jkWyX0frtD0a37rhumnMV3p9ZOC1xwAoXrAA,2291
@@ -14,7 +14,7 @@ arkindex_worker/worker/entity.py,sha256=Aj6EOfzHEm7qQV-Egm0YKLZgCrLS_3ggOKTY81M2
14
14
  arkindex_worker/worker/image.py,sha256=L6Ikuf0Z0RxJk7JarY5PggJGrYSHLaPK0vn0dy0CIaQ,623
15
15
  arkindex_worker/worker/metadata.py,sha256=keZdOdUthSH2hAw9iet5pN7rzWihTUYjZHRGTEjaltw,6843
16
16
  arkindex_worker/worker/process.py,sha256=9TEHpMcBax1wc6PrWMMrdXe2uNfqyVj7n_dAYZRBGnY,1854
17
- arkindex_worker/worker/task.py,sha256=nYfMSFm_d-4t8y4PO4HjFBnLsZf7IsDjkS7-A2Pgnac,1525
17
+ arkindex_worker/worker/task.py,sha256=HASQU5LYVtgvCnRCLFC6iH7h7v6q_usZNZ-r_Wkv9A8,3306
18
18
  arkindex_worker/worker/training.py,sha256=tyQOHcwv--_wdYz6CgLEe1YM7kwwwKN30LvGTsnWd78,10923
19
19
  arkindex_worker/worker/transcription.py,sha256=sw718R119tsLNY8inPMVeIilvFJo94fMbMtYgH0zTM8,21250
20
20
  examples/standalone/python/worker.py,sha256=Zr4s4pHvgexEjlkixLFYZp1UuwMLeoTxjyNG5_S2iYE,6672
@@ -24,7 +24,7 @@ tests/__init__.py,sha256=DG--S6IpGl399rzSAjDdHL76CkOIeZIjajCcyUSDhOQ,241
24
24
  tests/conftest.py,sha256=Tp7YFK17NATwF2yAcBwi0QFNyKSXtLS0VhZ-zZngsQI,24343
25
25
  tests/test_base_worker.py,sha256=lwS4X3atS2ktEKd1XdogmN3mbzq-tO206-k_0EDITlw,29302
26
26
  tests/test_cache.py,sha256=_wztzh94EwVrb8UvpFqgl2aa2_FLaCcJKaqunCYR5Dw,10435
27
- tests/test_dataset_worker.py,sha256=iDJM2C4PfQNH0r4_QqSWoPt8BcM0geUUdODtWY0Z9PA,22412
27
+ tests/test_dataset_worker.py,sha256=LmL3ERF1__PUPkTLiAFC0IYglZTv5WQYA42Vm-uhe2w,22023
28
28
  tests/test_element.py,sha256=hlj5VSF4plwC7uz9R4LGOOXZJQcHZiYCIDZT5V6EIB8,14334
29
29
  tests/test_image.py,sha256=yAM5mMfpQcIurT1KLHmu0AhSX2Qm3YvCu7afyZ3XUdU,28314
30
30
  tests/test_merge.py,sha256=REpZ13jkq_qm_4L5URQgFy5lxvPZtXxQEiWfYLMdmF0,7956
@@ -44,7 +44,7 @@ tests/test_elements_worker/test_entity.py,sha256=SNAZEsVVLnqlliOmjkgv_cZhw0bAuJU
44
44
  tests/test_elements_worker/test_image.py,sha256=BljMNKgec_9a5bzNzFpYZIvSbuvwsWDfdqLHVJaTa7M,2079
45
45
  tests/test_elements_worker/test_metadata.py,sha256=qtTDtlp3VnBkfck7PAguK2dEgTLlr1i1EVnmNTeNf3A,20515
46
46
  tests/test_elements_worker/test_process.py,sha256=y4RoVhPfyHzR795fw7-_FXElBcKo3fy4Ew_HI-kxJic,3088
47
- tests/test_elements_worker/test_task.py,sha256=wTUWqN9UhfKmJn3IcFY75EW4I1ulRhisflmY1kmP47s,5574
47
+ tests/test_elements_worker/test_task.py,sha256=oHwP1fbJftXFA2U4qA3Gb4vX-iJoV-sBvPHnfBBpRrc,8906
48
48
  tests/test_elements_worker/test_training.py,sha256=qgK7BLucddRzc8ePbQtY75x17QvGDEq5XCwgyyvmAJE,8717
49
49
  tests/test_elements_worker/test_transcription_create.py,sha256=yznO9B_BVsOR0Z_VY5ZL8gJp0ZPCz_4sPUs5dXtixAg,29281
50
50
  tests/test_elements_worker/test_transcription_create_with_elements.py,sha256=tmcyglgssEqMnt1Mdy_u6X1m2wgLWTo_HdWst3GrK2k,33056
@@ -55,7 +55,7 @@ worker-demo/tests/conftest.py,sha256=XzNMNeg6pmABUAH8jN6eZTlZSFGLYjS3-DTXjiRN6Yc
55
55
  worker-demo/tests/test_worker.py,sha256=3DLd4NRK4bfyatG5P_PK4k9P9tJHx9XQq5_ryFEEFVg,304
56
56
  worker-demo/worker_demo/__init__.py,sha256=2BPomV8ZMNf3YXJgloatKeHQCE6QOkwmsHGkO6MkQuM,125
57
57
  worker-demo/worker_demo/worker.py,sha256=Rt-DjWa5iBP08k58NDZMfeyPuFbtNcbX6nc5jFX7GNo,440
58
- arkindex_base_worker-0.5.2a1.dist-info/METADATA,sha256=AwYp_xJZzu6zAtvnvZjeK_W29tzqvRuwYnxwMYcKSIc,1849
59
- arkindex_base_worker-0.5.2a1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
- arkindex_base_worker-0.5.2a1.dist-info/top_level.txt,sha256=-vNjP2VfROx0j83mdi9aIqRZ88eoJjxeWz-R_gPgyXU,49
61
- arkindex_base_worker-0.5.2a1.dist-info/RECORD,,
58
+ arkindex_base_worker-0.5.2a2.dist-info/METADATA,sha256=LyPpeyvKIadAuqir1cymTwxoWm3XovhF-JmzQ1LW0MI,1885
59
+ arkindex_base_worker-0.5.2a2.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
60
+ arkindex_base_worker-0.5.2a2.dist-info/top_level.txt,sha256=-vNjP2VfROx0j83mdi9aIqRZ88eoJjxeWz-R_gPgyXU,49
61
+ arkindex_base_worker-0.5.2a2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
arkindex_worker/image.py CHANGED
@@ -38,7 +38,7 @@ if TYPE_CHECKING:
38
38
  from arkindex_worker.models import Element
39
39
 
40
40
  # See http://docs.python-requests.org/en/master/user/advanced/#timeouts
41
- DOWNLOAD_TIMEOUT = (30, 60)
41
+ REQUEST_TIMEOUT = (30, 60)
42
42
 
43
43
  BoundingBox = namedtuple("BoundingBox", ["x", "y", "width", "height"])
44
44
 
@@ -346,7 +346,7 @@ def _retried_request(url, *args, method=requests.get, **kwargs):
346
346
  url,
347
347
  *args,
348
348
  headers={"User-Agent": IIIF_USER_AGENT},
349
- timeout=DOWNLOAD_TIMEOUT,
349
+ timeout=REQUEST_TIMEOUT,
350
350
  verify=should_verify_cert(url),
351
351
  **kwargs,
352
352
  )
@@ -424,12 +424,13 @@ class DatasetWorker(DatasetMixin, BaseWorker, TaskMixin):
424
424
  failed = 0
425
425
  for i, dataset_set in enumerate(dataset_sets, start=1):
426
426
  try:
427
- assert dataset_set.dataset.state == DatasetState.Complete.value, (
428
- "When processing a set, its dataset state should be Complete."
429
- )
430
-
431
- logger.info(f"Retrieving data for {dataset_set} ({i}/{count})")
432
- self.download_dataset_artifact(dataset_set.dataset)
427
+ if dataset_set.dataset.state == DatasetState.Complete.value:
428
+ logger.info(f"Retrieving data for {dataset_set} ({i}/{count})")
429
+ self.download_dataset_artifact(dataset_set.dataset)
430
+ else:
431
+ logger.warning(
432
+ f"The dataset {dataset_set.dataset} has its state set to `{dataset_set.dataset.state}`, its archive will not be downloaded"
433
+ )
433
434
 
434
435
  logger.info(f"Processing {dataset_set} ({i}/{count})")
435
436
  self.process_set(dataset_set)
@@ -444,7 +445,7 @@ class DatasetWorker(DatasetMixin, BaseWorker, TaskMixin):
444
445
 
445
446
  logger.warning(message, exc_info=e if self.args.verbose else None)
446
447
 
447
- # Cleanup the latest downloaded dataset artifact
448
+ # Cleanup the latest downloaded dataset artifact (if needed)
448
449
  self.cleanup_downloaded_artifact()
449
450
 
450
451
  message = f"Ran on {count} {pluralize('set', count)}: {count - failed} completed, {failed} failed"
@@ -4,9 +4,16 @@ BaseWorker methods for tasks.
4
4
 
5
5
  import uuid
6
6
  from collections.abc import Iterator
7
+ from http.client import REQUEST_TIMEOUT
8
+ from pathlib import Path
9
+
10
+ import magic
11
+ import requests
7
12
 
8
13
  from arkindex.compat import DownloadedFile
14
+ from arkindex_worker import logger
9
15
  from arkindex_worker.models import Artifact
16
+ from teklia_toolbox.requests import should_verify_cert
10
17
 
11
18
 
12
19
  class TaskMixin:
@@ -45,3 +52,49 @@ class TaskMixin:
45
52
  return self.api_client.request(
46
53
  "DownloadArtifact", id=task_id, path=artifact.path
47
54
  )
55
+
56
+ def upload_artifact(self, path: Path) -> None:
57
+ """
58
+ Upload a single file as an Artifact of the current task.
59
+
60
+ :param path: Path of the single file to upload as an Artifact.
61
+ """
62
+ assert path and isinstance(path, Path) and path.exists(), (
63
+ "path shouldn't be null, should be a Path and should exist"
64
+ )
65
+
66
+ if self.is_read_only:
67
+ logger.warning("Cannot upload artifact as this worker is in read-only mode")
68
+ return
69
+
70
+ # Get path relative to task's data directory
71
+ relpath = str(path.relative_to(self.work_dir))
72
+
73
+ # Get file size
74
+ size = path.stat().st_size
75
+
76
+ # Detect content type
77
+ try:
78
+ content_type = magic.from_file(path, mime=True)
79
+ except Exception as e:
80
+ logger.warning(f"Failed to get a mime type for {path}: {e}")
81
+ content_type = "application/octet-stream"
82
+
83
+ # Create artifact on API to get an S3 url
84
+ artifact = self.api_client.request(
85
+ "CreateArtifact",
86
+ id=self.task_id,
87
+ body={"path": relpath, "content_type": content_type, "size": size},
88
+ )
89
+
90
+ # Upload the file content to S3
91
+ s3_put_url = artifact["s3_put_url"]
92
+ with path.open("rb") as content:
93
+ resp = requests.put(
94
+ s3_put_url,
95
+ data=content,
96
+ headers={"Content-Type": content_type},
97
+ timeout=REQUEST_TIMEOUT,
98
+ verify=should_verify_cert(s3_put_url),
99
+ )
100
+ resp.raise_for_status()
@@ -435,34 +435,6 @@ def test_run_no_sets(mocker, caplog, mock_dataset_worker):
435
435
  ]
436
436
 
437
437
 
438
- def test_run_initial_dataset_state_error(
439
- mocker, responses, caplog, mock_dataset_worker, default_dataset
440
- ):
441
- default_dataset.state = DatasetState.Building.value
442
- mocker.patch(
443
- "arkindex_worker.worker.DatasetWorker.list_sets",
444
- return_value=[Set(name="train", dataset=default_dataset)],
445
- )
446
-
447
- with pytest.raises(SystemExit):
448
- mock_dataset_worker.run()
449
-
450
- assert len(responses.calls) == len(BASE_API_CALLS) * 2
451
- assert [
452
- (call.request.method, call.request.url) for call in responses.calls
453
- ] == BASE_API_CALLS * 2
454
-
455
- assert [(level, message) for _, level, message in caplog.record_tuples] == [
456
- (logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
457
- (logging.INFO, "Modern configuration is not available"),
458
- (
459
- logging.WARNING,
460
- "Failed running worker on Set (train) from Dataset (dataset_id): AssertionError('When processing a set, its dataset state should be Complete.')",
461
- ),
462
- (logging.ERROR, "Ran on 1 set: 0 completed, 1 failed"),
463
- ]
464
-
465
-
466
438
  def test_run_download_dataset_artifact_api_error(
467
439
  mocker,
468
440
  tmp_path,
@@ -570,16 +542,18 @@ def test_run_no_downloaded_dataset_artifact_error(
570
542
  ]
571
543
 
572
544
 
545
+ @pytest.mark.parametrize("dataset_state", DatasetState)
573
546
  def test_run(
574
547
  mocker,
575
548
  tmp_path,
576
549
  responses,
577
550
  caplog,
551
+ dataset_state,
578
552
  mock_dataset_worker,
579
553
  default_dataset,
580
554
  default_artifact,
581
555
  ):
582
- default_dataset.state = DatasetState.Complete.value
556
+ default_dataset.state = dataset_state.value
583
557
  mocker.patch(
584
558
  "arkindex_worker.worker.DatasetWorker.list_sets",
585
559
  return_value=[Set(name="train", dataset=default_dataset)],
@@ -590,55 +564,68 @@ def test_run(
590
564
  )
591
565
  mock_process = mocker.patch("arkindex_worker.worker.DatasetWorker.process_set")
592
566
 
593
- archive_path = (
594
- FIXTURES_DIR
595
- / "extract_parent_archives"
596
- / "first_parent"
597
- / "arkindex_data.tar.zst"
598
- )
599
- responses.add(
600
- responses.GET,
601
- f"http://testserver/api/v1/task/{default_dataset.task_id}/artifacts/",
602
- status=200,
603
- json=[default_artifact],
604
- )
605
- responses.add(
606
- responses.GET,
607
- f"http://testserver/api/v1/task/{default_dataset.task_id}/artifact/dataset_id.tar.zst",
608
- status=200,
609
- body=archive_path.read_bytes(),
610
- content_type="application/zstd",
611
- )
567
+ if dataset_state == DatasetState.Complete:
568
+ archive_path = (
569
+ FIXTURES_DIR
570
+ / "extract_parent_archives"
571
+ / "first_parent"
572
+ / "arkindex_data.tar.zst"
573
+ )
574
+ responses.add(
575
+ responses.GET,
576
+ f"http://testserver/api/v1/task/{default_dataset.task_id}/artifacts/",
577
+ status=200,
578
+ json=[default_artifact],
579
+ )
580
+ responses.add(
581
+ responses.GET,
582
+ f"http://testserver/api/v1/task/{default_dataset.task_id}/artifact/dataset_id.tar.zst",
583
+ status=200,
584
+ body=archive_path.read_bytes(),
585
+ content_type="application/zstd",
586
+ )
612
587
 
613
588
  mock_dataset_worker.run()
614
589
 
615
590
  assert mock_process.call_count == 1
616
591
 
617
- assert len(responses.calls) == len(BASE_API_CALLS) * 2 + 2
592
+ # We only download the dataset archive when it is Complete
593
+ extra_calls = []
594
+ if dataset_state == DatasetState.Complete:
595
+ extra_calls = [
596
+ (
597
+ "GET",
598
+ f"http://testserver/api/v1/task/{default_dataset.task_id}/artifacts/",
599
+ ),
600
+ (
601
+ "GET",
602
+ f"http://testserver/api/v1/task/{default_dataset.task_id}/artifact/dataset_id.tar.zst",
603
+ ),
604
+ ]
605
+
606
+ assert len(responses.calls) == len(BASE_API_CALLS) * 2 + len(extra_calls)
618
607
  assert [
619
608
  (call.request.method, call.request.url) for call in responses.calls
620
- ] == BASE_API_CALLS * 2 + [
621
- (
622
- "GET",
623
- f"http://testserver/api/v1/task/{default_dataset.task_id}/artifacts/",
624
- ),
625
- (
626
- "GET",
627
- f"http://testserver/api/v1/task/{default_dataset.task_id}/artifact/dataset_id.tar.zst",
628
- ),
629
- ]
609
+ ] == BASE_API_CALLS * 2 + extra_calls
630
610
 
631
- assert [(level, message) for _, level, message in caplog.record_tuples] == [
611
+ logs = [
632
612
  (logging.INFO, "Loaded Worker Fake worker @ 123412 from API"),
633
613
  (logging.INFO, "Modern configuration is not available"),
634
614
  (
635
- logging.INFO,
636
- "Retrieving data for Set (train) from Dataset (dataset_id) (1/1)",
615
+ logging.WARNING,
616
+ f"The dataset Dataset (dataset_id) has its state set to `{dataset_state.value}`, its archive will not be downloaded",
637
617
  ),
638
- (logging.INFO, "Downloading artifact for Dataset (dataset_id)"),
639
618
  (logging.INFO, "Processing Set (train) from Dataset (dataset_id) (1/1)"),
640
619
  (logging.INFO, "Ran on 1 set: 1 completed, 0 failed"),
641
620
  ]
621
+ if dataset_state == DatasetState.Complete:
622
+ logs[2] = (
623
+ logging.INFO,
624
+ "Retrieving data for Set (train) from Dataset (dataset_id) (1/1)",
625
+ )
626
+ logs.insert(3, (logging.INFO, "Downloading artifact for Dataset (dataset_id)"))
627
+
628
+ assert [(level, message) for _, level, message in caplog.record_tuples] == logs
642
629
 
643
630
 
644
631
  def test_run_read_only(
@@ -1,6 +1,9 @@
1
+ import tempfile
1
2
  import uuid
3
+ from pathlib import Path
2
4
 
3
5
  import pytest
6
+ from requests import HTTPError
4
7
 
5
8
  from arkindex.exceptions import ErrorResponse
6
9
  from arkindex_worker.models import Artifact
@@ -196,3 +199,112 @@ def test_download_artifact(
196
199
  ] == BASE_API_CALLS + [
197
200
  ("GET", f"http://testserver/api/v1/task/{TASK_ID}/artifact/dataset_id.tar.zst"),
198
201
  ]
202
+
203
+
204
+ @pytest.mark.parametrize(
205
+ ("payload", "error"),
206
+ [
207
+ # Path
208
+ (
209
+ {"path": None},
210
+ "path shouldn't be null, should be a Path and should exist",
211
+ ),
212
+ (
213
+ {"path": "not path type"},
214
+ "path shouldn't be null, should be a Path and should exist",
215
+ ),
216
+ (
217
+ {"path": Path("i_do_no_exist.oops")},
218
+ "path shouldn't be null, should be a Path and should exist",
219
+ ),
220
+ ],
221
+ )
222
+ def test_upload_artifact_wrong_param_path(mock_dataset_worker, payload, error):
223
+ with pytest.raises(AssertionError, match=error):
224
+ mock_dataset_worker.upload_artifact(**payload)
225
+
226
+
227
+ @pytest.fixture
228
+ def tmp_file(mock_dataset_worker):
229
+ with tempfile.NamedTemporaryFile(
230
+ mode="w", suffix=".txt", dir=mock_dataset_worker.work_dir
231
+ ) as file:
232
+ file.write("Some content...")
233
+ file.seek(0)
234
+
235
+ yield Path(file.name)
236
+
237
+
238
+ def test_upload_artifact_api_error(responses, mock_dataset_worker, tmp_file):
239
+ responses.add(
240
+ responses.POST,
241
+ "http://testserver/api/v1/task/my_task/artifacts/",
242
+ status=418,
243
+ )
244
+
245
+ with pytest.raises(ErrorResponse):
246
+ mock_dataset_worker.upload_artifact(path=tmp_file)
247
+
248
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
249
+ assert [
250
+ (call.request.method, call.request.url) for call in responses.calls
251
+ ] == BASE_API_CALLS + [("POST", "http://testserver/api/v1/task/my_task/artifacts/")]
252
+
253
+
254
+ def test_upload_artifact_s3_upload_error(
255
+ responses,
256
+ mock_dataset_worker,
257
+ tmp_file,
258
+ ):
259
+ responses.add(
260
+ responses.POST,
261
+ "http://testserver/api/v1/task/my_task/artifacts/",
262
+ json={
263
+ "id": "11111111-1111-1111-1111-111111111111",
264
+ "path": tmp_file.name,
265
+ "size": 15,
266
+ "content_type": "text/plain",
267
+ "s3_put_url": "http://example.com/oops.txt",
268
+ },
269
+ )
270
+ responses.add(responses.PUT, "http://example.com/oops.txt", status=500)
271
+
272
+ with pytest.raises(HTTPError):
273
+ mock_dataset_worker.upload_artifact(path=tmp_file)
274
+
275
+ assert len(responses.calls) == len(BASE_API_CALLS) + 2
276
+ assert [
277
+ (call.request.method, call.request.url) for call in responses.calls
278
+ ] == BASE_API_CALLS + [
279
+ ("POST", "http://testserver/api/v1/task/my_task/artifacts/"),
280
+ ("PUT", "http://example.com/oops.txt"),
281
+ ]
282
+
283
+
284
+ def test_upload_artifact(
285
+ responses,
286
+ mock_dataset_worker,
287
+ tmp_file,
288
+ ):
289
+ responses.add(
290
+ responses.POST,
291
+ "http://testserver/api/v1/task/my_task/artifacts/",
292
+ json={
293
+ "id": "11111111-1111-1111-1111-111111111111",
294
+ "path": tmp_file.name,
295
+ "size": 15,
296
+ "content_type": "text/plain",
297
+ "s3_put_url": "http://example.com/test.txt",
298
+ },
299
+ )
300
+ responses.add(responses.PUT, "http://example.com/test.txt")
301
+
302
+ mock_dataset_worker.upload_artifact(path=tmp_file)
303
+
304
+ assert len(responses.calls) == len(BASE_API_CALLS) + 2
305
+ assert [
306
+ (call.request.method, call.request.url) for call in responses.calls
307
+ ] == BASE_API_CALLS + [
308
+ ("POST", "http://testserver/api/v1/task/my_task/artifacts/"),
309
+ ("PUT", "http://example.com/test.txt"),
310
+ ]