fractal-server 2.14.0a19__py3-none-any.whl → 2.14.0a21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/routes/api/v2/history.py +43 -1
- fractal_server/app/runner/executors/slurm_common/base_slurm_runner.py +46 -23
- fractal_server/app/runner/v2/db_tools.py +19 -0
- fractal_server/app/runner/v2/runner.py +14 -0
- {fractal_server-2.14.0a19.dist-info → fractal_server-2.14.0a21.dist-info}/METADATA +1 -1
- {fractal_server-2.14.0a19.dist-info → fractal_server-2.14.0a21.dist-info}/RECORD +10 -10
- {fractal_server-2.14.0a19.dist-info → fractal_server-2.14.0a21.dist-info}/LICENSE +0 -0
- {fractal_server-2.14.0a19.dist-info → fractal_server-2.14.0a21.dist-info}/WHEEL +0 -0
- {fractal_server-2.14.0a19.dist-info → fractal_server-2.14.0a21.dist-info}/entry_points.txt +0 -0
fractal_server/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__VERSION__ = "2.14.
|
1
|
+
__VERSION__ = "2.14.0a21"
|
@@ -41,6 +41,24 @@ from fractal_server.images.tools import merge_type_filters
|
|
41
41
|
from fractal_server.logger import set_logger
|
42
42
|
|
43
43
|
|
44
|
+
def check_historyrun_related_to_dataset_and_wftask(
|
45
|
+
history_run: HistoryRun,
|
46
|
+
dataset_id: int,
|
47
|
+
workflowtask_id: int,
|
48
|
+
):
|
49
|
+
if (
|
50
|
+
history_run.dataset_id != dataset_id
|
51
|
+
or history_run.workflowtask_id != workflowtask_id
|
52
|
+
):
|
53
|
+
raise HTTPException(
|
54
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
55
|
+
detail=(
|
56
|
+
f"Invalid query parameters: HistoryRun[{history_run.id}] is "
|
57
|
+
f"not related to {dataset_id=} and {workflowtask_id=}."
|
58
|
+
),
|
59
|
+
)
|
60
|
+
|
61
|
+
|
44
62
|
class ImageWithStatusPage(PaginationResponse[SingleImageWithStatus]):
|
45
63
|
|
46
64
|
attributes: dict[str, list[Any]]
|
@@ -199,7 +217,14 @@ async def get_history_run_units(
|
|
199
217
|
)
|
200
218
|
|
201
219
|
# Check that `HistoryRun` exists
|
202
|
-
await get_history_run_or_404(
|
220
|
+
history_run = await get_history_run_or_404(
|
221
|
+
history_run_id=history_run_id, db=db
|
222
|
+
)
|
223
|
+
check_historyrun_related_to_dataset_and_wftask(
|
224
|
+
history_run=history_run,
|
225
|
+
dataset_id=dataset_id,
|
226
|
+
workflowtask_id=workflowtask_id,
|
227
|
+
)
|
203
228
|
|
204
229
|
# Count `HistoryUnit`s
|
205
230
|
stmt = select(func.count(HistoryUnit.id)).where(
|
@@ -450,6 +475,23 @@ async def get_history_unit_log(
|
|
450
475
|
db=db,
|
451
476
|
)
|
452
477
|
|
478
|
+
if history_unit.history_run_id != history_run_id:
|
479
|
+
raise HTTPException(
|
480
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
481
|
+
detail=(
|
482
|
+
f"Invalid query parameters: HistoryUnit[{history_unit_id}] "
|
483
|
+
f"is not related to HistoryRun[{history_run_id}]"
|
484
|
+
),
|
485
|
+
)
|
486
|
+
history_run = await get_history_run_or_404(
|
487
|
+
history_run_id=history_run_id, db=db
|
488
|
+
)
|
489
|
+
check_historyrun_related_to_dataset_and_wftask(
|
490
|
+
history_run=history_run,
|
491
|
+
dataset_id=dataset_id,
|
492
|
+
workflowtask_id=workflowtask_id,
|
493
|
+
)
|
494
|
+
|
453
495
|
# Get log or placeholder text
|
454
496
|
log = read_log_file(
|
455
497
|
logfile=history_unit.logfile,
|
@@ -23,6 +23,9 @@ from fractal_server.app.runner.filenames import SHUTDOWN_FILENAME
|
|
23
23
|
from fractal_server.app.runner.task_files import MULTISUBMIT_PREFIX
|
24
24
|
from fractal_server.app.runner.task_files import SUBMIT_PREFIX
|
25
25
|
from fractal_server.app.runner.task_files import TaskFiles
|
26
|
+
from fractal_server.app.runner.v2.db_tools import (
|
27
|
+
bulk_update_status_of_history_unit,
|
28
|
+
)
|
26
29
|
from fractal_server.app.runner.v2.db_tools import (
|
27
30
|
update_logfile_of_history_unit,
|
28
31
|
)
|
@@ -47,6 +50,7 @@ class BaseSlurmRunner(BaseRunner):
|
|
47
50
|
root_dir_local: Path
|
48
51
|
root_dir_remote: Path
|
49
52
|
poll_interval: int
|
53
|
+
poll_interval_internal: float
|
50
54
|
jobs: dict[str, SlurmJob]
|
51
55
|
python_worker_interpreter: str
|
52
56
|
slurm_runner_type: Literal["ssh", "sudo"]
|
@@ -72,6 +76,8 @@ class BaseSlurmRunner(BaseRunner):
|
|
72
76
|
self.poll_interval = (
|
73
77
|
poll_interval or settings.FRACTAL_SLURM_POLL_INTERVAL
|
74
78
|
)
|
79
|
+
self.poll_interval_internal = self.poll_interval / 10.0
|
80
|
+
|
75
81
|
self.check_fractal_server_versions()
|
76
82
|
|
77
83
|
# Create job folders. Note that the local one may or may not exist
|
@@ -405,6 +411,33 @@ class BaseSlurmRunner(BaseRunner):
|
|
405
411
|
def job_ids(self) -> list[str]:
|
406
412
|
return list(self.jobs.keys())
|
407
413
|
|
414
|
+
def wait_and_check_shutdown(self) -> list[str]:
|
415
|
+
"""
|
416
|
+
Wait at most `self.poll_interval`, while also checking for shutdown.
|
417
|
+
"""
|
418
|
+
# Sleep for `self.poll_interval`, but keep checking for shutdowns
|
419
|
+
start_time = time.perf_counter()
|
420
|
+
max_time = start_time + self.poll_interval
|
421
|
+
can_return = False
|
422
|
+
logger.debug(
|
423
|
+
"[wait_and_check_shutdown] "
|
424
|
+
f"I will wait at most {self.poll_interval} s, "
|
425
|
+
f"in blocks of {self.poll_interval_internal} s."
|
426
|
+
)
|
427
|
+
|
428
|
+
while (time.perf_counter() < max_time) or (can_return is False):
|
429
|
+
# Handle shutdown
|
430
|
+
if self.is_shutdown():
|
431
|
+
logger.info("[wait_and_check_shutdown] Shutdown file detected")
|
432
|
+
scancelled_job_ids = self.scancel_jobs()
|
433
|
+
logger.info(f"[wait_and_check_shutdown] {scancelled_job_ids=}")
|
434
|
+
return scancelled_job_ids
|
435
|
+
can_return = True
|
436
|
+
time.sleep(self.poll_interval_internal)
|
437
|
+
|
438
|
+
logger.debug("[wait_and_check_shutdown] No shutdown file detected")
|
439
|
+
return []
|
440
|
+
|
408
441
|
def submit(
|
409
442
|
self,
|
410
443
|
func: callable,
|
@@ -491,15 +524,9 @@ class BaseSlurmRunner(BaseRunner):
|
|
491
524
|
|
492
525
|
# Retrieval phase
|
493
526
|
logger.info("[submit] START retrieval phase")
|
527
|
+
scancelled_job_ids = []
|
494
528
|
while len(self.jobs) > 0:
|
495
529
|
|
496
|
-
# Handle shutdown
|
497
|
-
scancelled_job_ids = []
|
498
|
-
if self.is_shutdown():
|
499
|
-
logger.info("[submit] Shutdown file detected")
|
500
|
-
scancelled_job_ids = self.scancel_jobs()
|
501
|
-
logger.info(f"[submit] {scancelled_job_ids=}")
|
502
|
-
|
503
530
|
# Look for finished jobs
|
504
531
|
finished_job_ids = self._get_finished_jobs(job_ids=self.job_ids)
|
505
532
|
logger.debug(f"[submit] {finished_job_ids=}")
|
@@ -531,7 +558,8 @@ class BaseSlurmRunner(BaseRunner):
|
|
531
558
|
db_sync=db,
|
532
559
|
)
|
533
560
|
|
534
|
-
|
561
|
+
if len(self.jobs) > 0:
|
562
|
+
scancelled_job_ids = self.wait_and_check_shutdown()
|
535
563
|
|
536
564
|
logger.info("[submit] END")
|
537
565
|
return result, exception
|
@@ -554,13 +582,11 @@ class BaseSlurmRunner(BaseRunner):
|
|
554
582
|
if self.is_shutdown():
|
555
583
|
if task_type == "parallel":
|
556
584
|
with next(get_sync_db()) as db:
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
db_sync=db,
|
563
|
-
)
|
585
|
+
bulk_update_status_of_history_unit(
|
586
|
+
history_unit_ids=history_unit_ids,
|
587
|
+
status=HistoryUnitStatus.FAILED,
|
588
|
+
db_sync=db,
|
589
|
+
)
|
564
590
|
results = {}
|
565
591
|
exceptions = {
|
566
592
|
ind: SHUTDOWN_EXCEPTION for ind in range(len(list_parameters))
|
@@ -687,17 +713,11 @@ class BaseSlurmRunner(BaseRunner):
|
|
687
713
|
logger.info("[multisubmit] START retrieval phase")
|
688
714
|
while len(self.jobs) > 0:
|
689
715
|
|
690
|
-
# Handle shutdown
|
691
|
-
scancelled_job_ids = []
|
692
|
-
if self.is_shutdown():
|
693
|
-
logger.info("[multisubmit] Shutdown file detected")
|
694
|
-
scancelled_job_ids = self.scancel_jobs()
|
695
|
-
logger.info(f"[multisubmit] {scancelled_job_ids=}")
|
696
|
-
|
697
716
|
# Look for finished jobs
|
698
717
|
finished_job_ids = self._get_finished_jobs(job_ids=self.job_ids)
|
699
718
|
logger.debug(f"[multisubmit] {finished_job_ids=}")
|
700
719
|
|
720
|
+
scancelled_job_ids = []
|
701
721
|
with next(get_sync_db()) as db:
|
702
722
|
for slurm_job_id in finished_job_ids:
|
703
723
|
logger.info(f"[multisubmit] Now process {slurm_job_id=}")
|
@@ -737,7 +757,10 @@ class BaseSlurmRunner(BaseRunner):
|
|
737
757
|
db_sync=db,
|
738
758
|
)
|
739
759
|
|
740
|
-
|
760
|
+
if len(self.jobs) > 0:
|
761
|
+
scancelled_job_ids = self.wait_and_check_shutdown()
|
762
|
+
|
763
|
+
logger.info("[multisubmit] END")
|
741
764
|
return results, exceptions
|
742
765
|
|
743
766
|
def check_fractal_server_versions(self) -> None:
|
@@ -2,6 +2,7 @@ from typing import Any
|
|
2
2
|
|
3
3
|
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
4
4
|
from sqlalchemy.orm import Session
|
5
|
+
from sqlmodel import update
|
5
6
|
|
6
7
|
from fractal_server.app.db import get_sync_db
|
7
8
|
from fractal_server.app.models.v2 import HistoryImageCache
|
@@ -40,6 +41,24 @@ def update_status_of_history_unit(
|
|
40
41
|
db_sync.commit()
|
41
42
|
|
42
43
|
|
44
|
+
def bulk_update_status_of_history_unit(
|
45
|
+
*,
|
46
|
+
history_unit_ids: list[int],
|
47
|
+
status: HistoryUnitStatus,
|
48
|
+
db_sync: Session,
|
49
|
+
) -> None:
|
50
|
+
for ind in range(0, len(history_unit_ids), _CHUNK_SIZE):
|
51
|
+
db_sync.execute(
|
52
|
+
update(HistoryUnit)
|
53
|
+
.where(
|
54
|
+
HistoryUnit.id.in_(history_unit_ids[ind : ind + _CHUNK_SIZE])
|
55
|
+
)
|
56
|
+
.values(status=status)
|
57
|
+
)
|
58
|
+
# NOTE: keeping commit within the for loop is much more efficient
|
59
|
+
db_sync.commit()
|
60
|
+
|
61
|
+
|
43
62
|
def update_logfile_of_history_unit(
|
44
63
|
*,
|
45
64
|
history_unit_id: int,
|
@@ -8,6 +8,7 @@ from typing import Literal
|
|
8
8
|
from typing import Optional
|
9
9
|
|
10
10
|
from sqlalchemy.orm.attributes import flag_modified
|
11
|
+
from sqlmodel import delete
|
11
12
|
|
12
13
|
from ....images import SingleImage
|
13
14
|
from ....images.tools import filter_image_list
|
@@ -21,6 +22,7 @@ from .task_interface import TaskOutput
|
|
21
22
|
from fractal_server.app.db import get_sync_db
|
22
23
|
from fractal_server.app.models.v2 import AccountingRecord
|
23
24
|
from fractal_server.app.models.v2 import DatasetV2
|
25
|
+
from fractal_server.app.models.v2 import HistoryImageCache
|
24
26
|
from fractal_server.app.models.v2 import HistoryRun
|
25
27
|
from fractal_server.app.models.v2 import TaskGroupV2
|
26
28
|
from fractal_server.app.models.v2 import WorkflowTaskV2
|
@@ -331,6 +333,18 @@ def execute_tasks_v2(
|
|
331
333
|
db_dataset.images = tmp_images
|
332
334
|
flag_modified(db_dataset, "images")
|
333
335
|
db.merge(db_dataset)
|
336
|
+
|
337
|
+
db.execute(
|
338
|
+
delete(HistoryImageCache)
|
339
|
+
.where(HistoryImageCache.dataset_id == dataset.id)
|
340
|
+
.where(HistoryImageCache.workflowtask_id == wftask.id)
|
341
|
+
.where(
|
342
|
+
HistoryImageCache.zarr_url.in_(
|
343
|
+
current_task_output.image_list_removals
|
344
|
+
)
|
345
|
+
)
|
346
|
+
)
|
347
|
+
|
334
348
|
db.commit()
|
335
349
|
db.close() # FIXME: why is this needed?
|
336
350
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
fractal_server/__init__.py,sha256=
|
1
|
+
fractal_server/__init__.py,sha256=X5Dy_f87GBiFeLzs2riLgudM2HP43U0ZuXNsU2NF7Os,26
|
2
2
|
fractal_server/__main__.py,sha256=rkM8xjY1KeS3l63irB8yCrlVobR-73uDapC4wvrIlxI,6957
|
3
3
|
fractal_server/alembic.ini,sha256=MWwi7GzjzawI9cCAK1LW7NxIBQDUqD12-ptJoq5JpP0,3153
|
4
4
|
fractal_server/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -36,7 +36,7 @@ fractal_server/app/routes/api/v2/_aux_functions_history.py,sha256=ZlI6nwzB5r9AiY
|
|
36
36
|
fractal_server/app/routes/api/v2/_aux_functions_task_lifecycle.py,sha256=qdXCb6IP8-qPEAxGZKljtjIqNzIAyRaAsQSRi5VqFHM,6773
|
37
37
|
fractal_server/app/routes/api/v2/_aux_functions_tasks.py,sha256=uhNSs-jcS7ndIUFKiOC1yrDiViw3uvKEXi9UL04BMks,11642
|
38
38
|
fractal_server/app/routes/api/v2/dataset.py,sha256=h5AhE0sdhQ20ZlIbEJsFnHIOUW0S1VHFpoflpBkVScs,8936
|
39
|
-
fractal_server/app/routes/api/v2/history.py,sha256=
|
39
|
+
fractal_server/app/routes/api/v2/history.py,sha256=lMbaybooBzzbCgD9vdzPyNxdgAZuzCH_YrW9ost-UgI,17253
|
40
40
|
fractal_server/app/routes/api/v2/images.py,sha256=BGpO94gVd8BTpCN6Mun2RXmjrPmfkIp73m8RN7uiGW4,8361
|
41
41
|
fractal_server/app/routes/api/v2/job.py,sha256=MU1sHIKk_89WrD0TD44d4ufzqnywot7On_W71KjyUbQ,6500
|
42
42
|
fractal_server/app/routes/api/v2/project.py,sha256=uAZgATiHcOvbnRX-vv1D3HoaEUvLUd7vzVmGcqOP8ZY,4602
|
@@ -78,7 +78,7 @@ fractal_server/app/runner/executors/slurm_common/__init__.py,sha256=47DEQpj8HBSa
|
|
78
78
|
fractal_server/app/runner/executors/slurm_common/_batching.py,sha256=ZY020JZlDS5mfpgpWTChQkyHU7iLE5kx2HVd57_C6XA,8850
|
79
79
|
fractal_server/app/runner/executors/slurm_common/_job_states.py,sha256=nuV-Zba38kDrRESOVB3gaGbrSPZc4q7YGichQaeqTW0,238
|
80
80
|
fractal_server/app/runner/executors/slurm_common/_slurm_config.py,sha256=fZaFUUXqDH0p3DndCFUpFqTqyD2tMVCuSYgYLAycpVw,15897
|
81
|
-
fractal_server/app/runner/executors/slurm_common/base_slurm_runner.py,sha256=
|
81
|
+
fractal_server/app/runner/executors/slurm_common/base_slurm_runner.py,sha256=f01IDAtBoatOYEP4UtrH0Y4qN7BwM1ov4Bx8rotQg1M,31099
|
82
82
|
fractal_server/app/runner/executors/slurm_common/get_slurm_config.py,sha256=-fAX1DZMB5RZnyYanIJD72mWOJAPkh21jd4loDXKJw4,5994
|
83
83
|
fractal_server/app/runner/executors/slurm_common/remote.py,sha256=FS_F8EaPp-A5eQT5_ZH3ICCHt0-C8b_2OSYcyRkXnb4,5851
|
84
84
|
fractal_server/app/runner/executors/slurm_common/slurm_job_task_models.py,sha256=YGgzTspkK9ItSMzwuYv_1tY7_1g89Qpeny5Auinxk1E,2708
|
@@ -98,10 +98,10 @@ fractal_server/app/runner/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
98
98
|
fractal_server/app/runner/v2/_local.py,sha256=DK8yagbvd6HHjcDVhUzTy0f7MURlTkQha-NM6OZKgJc,3044
|
99
99
|
fractal_server/app/runner/v2/_slurm_ssh.py,sha256=_bytOf8z9sdrhI03D6eqg-aQPnJ7V2-qnqpcHAYizns,3278
|
100
100
|
fractal_server/app/runner/v2/_slurm_sudo.py,sha256=DBCNxifXmMkpu71Wnk5u9-wKT7PV1WROQuY_4DYoZRI,2993
|
101
|
-
fractal_server/app/runner/v2/db_tools.py,sha256=
|
101
|
+
fractal_server/app/runner/v2/db_tools.py,sha256=KoPpbUtY5a1M4__Pp4luN8_TkpdkaUZXH_lkNoXHu4s,3394
|
102
102
|
fractal_server/app/runner/v2/deduplicate_list.py,sha256=IVTE4abBU1bUprFTkxrTfYKnvkNTanWQ-KWh_etiT08,645
|
103
103
|
fractal_server/app/runner/v2/merge_outputs.py,sha256=D1L4Taieq9i71SPQyNc1kMokgHh-sV_MqF3bv7QMDBc,907
|
104
|
-
fractal_server/app/runner/v2/runner.py,sha256=
|
104
|
+
fractal_server/app/runner/v2/runner.py,sha256=4Wrtx0HjjWD92yuIHqIxywPv8DeDh4iR9dZHXozIysQ,15781
|
105
105
|
fractal_server/app/runner/v2/runner_functions.py,sha256=5cK5O2rTrCsCxMTVN3iNPRwZ_891BC9_RMo64a8ZGYw,16338
|
106
106
|
fractal_server/app/runner/v2/runner_functions_low_level.py,sha256=9t1CHN3EyfsGRWfG257YPY5WjQ6zuztsw_KZrpEAFPo,3703
|
107
107
|
fractal_server/app/runner/v2/submit_workflow.py,sha256=EDUyUuIPwZHb2zm7SCRRoFsGq2cN-b5OKw6CYkZ8kWk,13048
|
@@ -207,8 +207,8 @@ fractal_server/tasks/v2/utils_templates.py,sha256=Kc_nSzdlV6KIsO0CQSPs1w70zLyENP
|
|
207
207
|
fractal_server/urls.py,sha256=QjIKAC1a46bCdiPMu3AlpgFbcv6a4l3ABcd5xz190Og,471
|
208
208
|
fractal_server/utils.py,sha256=PMwrxWFxRTQRl1b9h-NRIbFGPKqpH_hXnkAT3NfZdpY,3571
|
209
209
|
fractal_server/zip_tools.py,sha256=GjDgo_sf6V_DDg6wWeBlZu5zypIxycn_l257p_YVKGc,4876
|
210
|
-
fractal_server-2.14.
|
211
|
-
fractal_server-2.14.
|
212
|
-
fractal_server-2.14.
|
213
|
-
fractal_server-2.14.
|
214
|
-
fractal_server-2.14.
|
210
|
+
fractal_server-2.14.0a21.dist-info/LICENSE,sha256=QKAharUuhxL58kSoLizKJeZE3mTCBnX6ucmz8W0lxlk,1576
|
211
|
+
fractal_server-2.14.0a21.dist-info/METADATA,sha256=OZesW99axIUpT41CcJOkIszG_F5jLrDfKLT29EctIgI,4563
|
212
|
+
fractal_server-2.14.0a21.dist-info/WHEEL,sha256=7dDg4QLnNKTvwIDR9Ac8jJaAmBC_owJrckbC0jjThyA,88
|
213
|
+
fractal_server-2.14.0a21.dist-info/entry_points.txt,sha256=8tV2kynvFkjnhbtDnxAqImL6HMVKsopgGfew0DOp5UY,58
|
214
|
+
fractal_server-2.14.0a21.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|