fractal-server 2.14.0a10__py3-none-any.whl → 2.14.0a11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/routes/api/v2/submit.py +1 -1
- fractal_server/app/runner/components.py +0 -3
- fractal_server/app/runner/exceptions.py +4 -0
- fractal_server/app/runner/executors/base_runner.py +16 -17
- fractal_server/app/runner/executors/local/{_local_config.py → get_local_config.py} +0 -7
- fractal_server/app/runner/executors/local/runner.py +117 -58
- fractal_server/app/runner/executors/slurm_common/_check_jobs_status.py +4 -0
- fractal_server/app/runner/executors/slurm_ssh/executor.py +7 -5
- fractal_server/app/runner/executors/slurm_ssh/runner.py +6 -10
- fractal_server/app/runner/executors/slurm_sudo/runner.py +201 -96
- fractal_server/app/runner/task_files.py +8 -0
- fractal_server/app/runner/v2/__init__.py +0 -366
- fractal_server/app/runner/v2/_local.py +2 -2
- fractal_server/app/runner/v2/_slurm_ssh.py +2 -2
- fractal_server/app/runner/v2/_slurm_sudo.py +2 -2
- fractal_server/app/runner/v2/db_tools.py +87 -0
- fractal_server/app/runner/v2/runner.py +77 -81
- fractal_server/app/runner/v2/runner_functions.py +274 -436
- fractal_server/app/runner/v2/runner_functions_low_level.py +37 -39
- fractal_server/app/runner/v2/submit_workflow.py +366 -0
- fractal_server/app/runner/v2/task_interface.py +31 -0
- {fractal_server-2.14.0a10.dist-info → fractal_server-2.14.0a11.dist-info}/METADATA +1 -1
- {fractal_server-2.14.0a10.dist-info → fractal_server-2.14.0a11.dist-info}/RECORD +27 -28
- fractal_server/app/runner/executors/local/_submit_setup.py +0 -46
- fractal_server/app/runner/executors/slurm_common/_submit_setup.py +0 -84
- fractal_server/app/runner/v2/_db_tools.py +0 -48
- {fractal_server-2.14.0a10.dist-info → fractal_server-2.14.0a11.dist-info}/LICENSE +0 -0
- {fractal_server-2.14.0a10.dist-info → fractal_server-2.14.0a11.dist-info}/WHEEL +0 -0
- {fractal_server-2.14.0a10.dist-info → fractal_server-2.14.0a11.dist-info}/entry_points.txt +0 -0
@@ -9,19 +9,19 @@ import time
|
|
9
9
|
from copy import copy
|
10
10
|
from pathlib import Path
|
11
11
|
from typing import Any
|
12
|
+
from typing import Literal
|
12
13
|
from typing import Optional
|
13
14
|
|
14
15
|
import cloudpickle
|
15
16
|
from pydantic import BaseModel
|
16
17
|
from pydantic import ConfigDict
|
17
18
|
|
18
|
-
from ..slurm_common._check_jobs_status import
|
19
|
-
|
20
|
-
)
|
19
|
+
from ..slurm_common._check_jobs_status import get_finished_jobs
|
20
|
+
from ..slurm_common._check_jobs_status import run_squeue
|
21
21
|
from ._subprocess_run_as_user import _mkdir_as_user
|
22
22
|
from ._subprocess_run_as_user import _run_command_as_user
|
23
23
|
from fractal_server import __VERSION__
|
24
|
-
from fractal_server.app.
|
24
|
+
from fractal_server.app.db import get_sync_db
|
25
25
|
from fractal_server.app.runner.exceptions import JobExecutionError
|
26
26
|
from fractal_server.app.runner.exceptions import TaskExecutionError
|
27
27
|
from fractal_server.app.runner.executors.base_runner import BaseRunner
|
@@ -33,7 +33,8 @@ from fractal_server.app.runner.executors.slurm_common._slurm_config import (
|
|
33
33
|
)
|
34
34
|
from fractal_server.app.runner.filenames import SHUTDOWN_FILENAME
|
35
35
|
from fractal_server.app.runner.task_files import TaskFiles
|
36
|
-
from fractal_server.app.
|
36
|
+
from fractal_server.app.runner.v2.db_tools import update_status_of_history_unit
|
37
|
+
from fractal_server.app.schemas.v2 import HistoryUnitStatus
|
37
38
|
from fractal_server.config import get_settings
|
38
39
|
from fractal_server.logger import set_logger
|
39
40
|
from fractal_server.syringe import Inject
|
@@ -100,48 +101,68 @@ class SlurmJob(BaseModel):
|
|
100
101
|
tasks: list[SlurmTask]
|
101
102
|
|
102
103
|
@property
|
103
|
-
def
|
104
|
+
def slurm_submission_script_local(self) -> str:
|
105
|
+
return (
|
106
|
+
self.workdir_local / f"slurm-{self.label}-submit.sh"
|
107
|
+
).as_posix()
|
108
|
+
|
109
|
+
@property
|
110
|
+
def slurm_submission_script_remote(self) -> str:
|
111
|
+
return (
|
112
|
+
self.workdir_remote / f"slurm-{self.label}-submit.sh"
|
113
|
+
).as_posix()
|
114
|
+
|
115
|
+
@property
|
116
|
+
def slurm_stdout_remote(self) -> str:
|
104
117
|
if self.slurm_job_id:
|
105
118
|
return (
|
106
|
-
self.
|
107
|
-
/ f"slurm-{self.label}-{self.slurm_job_id}.
|
119
|
+
self.workdir_remote
|
120
|
+
/ f"slurm-{self.label}-{self.slurm_job_id}.out"
|
108
121
|
).as_posix()
|
122
|
+
|
109
123
|
else:
|
110
124
|
return (
|
111
|
-
self.
|
125
|
+
self.workdir_remote / f"slurm-{self.label}-%j.out"
|
112
126
|
).as_posix()
|
113
127
|
|
114
128
|
@property
|
115
|
-
def
|
129
|
+
def slurm_stderr_remote(self) -> str:
|
116
130
|
if self.slurm_job_id:
|
117
131
|
return (
|
118
132
|
self.workdir_remote
|
119
|
-
/ f"slurm-{self.label}-{self.slurm_job_id}.
|
133
|
+
/ f"slurm-{self.label}-{self.slurm_job_id}.err"
|
120
134
|
).as_posix()
|
135
|
+
|
121
136
|
else:
|
122
137
|
return (
|
123
|
-
self.workdir_remote / f"slurm-{self.label}-%j.
|
138
|
+
self.workdir_remote / f"slurm-{self.label}-%j.err"
|
124
139
|
).as_posix()
|
125
140
|
|
126
141
|
@property
|
127
|
-
def
|
128
|
-
|
129
|
-
|
130
|
-
|
142
|
+
def slurm_stdout_local(self) -> str:
|
143
|
+
if self.slurm_job_id:
|
144
|
+
return (
|
145
|
+
self.workdir_local
|
146
|
+
/ f"slurm-{self.label}-{self.slurm_job_id}.out"
|
147
|
+
).as_posix()
|
131
148
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
).as_posix()
|
149
|
+
else:
|
150
|
+
return (
|
151
|
+
self.workdir_local / f"slurm-{self.label}-%j.out"
|
152
|
+
).as_posix()
|
137
153
|
|
138
154
|
@property
|
139
|
-
def
|
140
|
-
|
155
|
+
def slurm_stderr_local(self) -> str:
|
156
|
+
if self.slurm_job_id:
|
157
|
+
return (
|
158
|
+
self.workdir_local
|
159
|
+
/ f"slurm-{self.label}-{self.slurm_job_id}.err"
|
160
|
+
).as_posix()
|
141
161
|
|
142
|
-
|
143
|
-
|
144
|
-
|
162
|
+
else:
|
163
|
+
return (
|
164
|
+
self.workdir_local / f"slurm-{self.label}-%j.err"
|
165
|
+
).as_posix()
|
145
166
|
|
146
167
|
@property
|
147
168
|
def log_files_local(self) -> list[str]:
|
@@ -287,6 +308,7 @@ class RunnerSlurmSudo(BaseRunner):
|
|
287
308
|
slurm_job: SlurmJob,
|
288
309
|
slurm_config: SlurmConfig,
|
289
310
|
) -> str:
|
311
|
+
logger.debug("[_submit_single_sbatch] START")
|
290
312
|
# Prepare input pickle(s)
|
291
313
|
versions = dict(
|
292
314
|
python=sys.version_info[:3],
|
@@ -295,10 +317,17 @@ class RunnerSlurmSudo(BaseRunner):
|
|
295
317
|
)
|
296
318
|
for task in slurm_job.tasks:
|
297
319
|
_args = []
|
298
|
-
_kwargs = dict(
|
320
|
+
_kwargs = dict(
|
321
|
+
parameters=task.parameters,
|
322
|
+
remote_files=task.task_files.remote_files_dict,
|
323
|
+
)
|
299
324
|
funcser = cloudpickle.dumps((versions, func, _args, _kwargs))
|
300
325
|
with open(task.input_pickle_file_local, "wb") as f:
|
301
326
|
f.write(funcser)
|
327
|
+
logger.debug(
|
328
|
+
"[_submit_single_sbatch] Written "
|
329
|
+
f"{task.input_pickle_file_local=}"
|
330
|
+
)
|
302
331
|
# Prepare commands to be included in SLURM submission script
|
303
332
|
settings = Inject(get_settings)
|
304
333
|
python_worker_interpreter = (
|
@@ -335,8 +364,8 @@ class RunnerSlurmSudo(BaseRunner):
|
|
335
364
|
# fix their order
|
336
365
|
script_lines.extend(
|
337
366
|
[
|
338
|
-
f"#SBATCH --
|
339
|
-
f"#SBATCH --
|
367
|
+
f"#SBATCH --out={slurm_job.slurm_stdout_remote}",
|
368
|
+
f"#SBATCH --err={slurm_job.slurm_stderr_remote}",
|
340
369
|
f"#SBATCH -D {slurm_job.workdir_remote}",
|
341
370
|
]
|
342
371
|
)
|
@@ -394,8 +423,10 @@ class RunnerSlurmSudo(BaseRunner):
|
|
394
423
|
"""
|
395
424
|
Note: this would differ for SSH
|
396
425
|
"""
|
426
|
+
logger.debug(f"[_copy_files_from_remote_to_local] {job.slurm_job_id=}")
|
397
427
|
source_target_list = [
|
398
|
-
(job.
|
428
|
+
(job.slurm_stdout_remote, job.slurm_stdout_local),
|
429
|
+
(job.slurm_stderr_remote, job.slurm_stderr_local),
|
399
430
|
]
|
400
431
|
for task in job.tasks:
|
401
432
|
source_target_list.extend(
|
@@ -463,21 +494,22 @@ class RunnerSlurmSudo(BaseRunner):
|
|
463
494
|
self,
|
464
495
|
func: callable,
|
465
496
|
parameters: dict[str, Any],
|
466
|
-
|
497
|
+
history_unit_id: int,
|
467
498
|
task_files: TaskFiles,
|
468
|
-
|
469
|
-
|
499
|
+
task_type: Literal[
|
500
|
+
"non_parallel",
|
501
|
+
"converter_non_parallel",
|
502
|
+
"compound",
|
503
|
+
"converter_compound",
|
504
|
+
],
|
505
|
+
config: SlurmConfig,
|
470
506
|
) -> tuple[Any, Exception]:
|
471
|
-
workdir_local = task_files.wftask_subfolder_local
|
472
|
-
workdir_remote = task_files.wftask_subfolder_remote
|
473
507
|
|
474
|
-
|
475
|
-
|
476
|
-
exclude={"component"},
|
477
|
-
),
|
478
|
-
component=parameters[_COMPONENT_KEY_],
|
479
|
-
)
|
508
|
+
if len(self.jobs) > 0:
|
509
|
+
raise RuntimeError(f"Cannot run .submit when {len(self.jobs)=}")
|
480
510
|
|
511
|
+
workdir_local = task_files.wftask_subfolder_local
|
512
|
+
workdir_remote = task_files.wftask_subfolder_remote
|
481
513
|
if self.jobs != {}:
|
482
514
|
raise JobExecutionError("Unexpected branch: jobs should be empty.")
|
483
515
|
|
@@ -485,7 +517,7 @@ class RunnerSlurmSudo(BaseRunner):
|
|
485
517
|
raise JobExecutionError("Cannot continue after shutdown.")
|
486
518
|
|
487
519
|
# Validation phase
|
488
|
-
self.validate_submit_parameters(parameters)
|
520
|
+
self.validate_submit_parameters(parameters, task_type=task_type)
|
489
521
|
|
490
522
|
# Create task subfolder
|
491
523
|
original_umask = os.umask(0)
|
@@ -504,7 +536,7 @@ class RunnerSlurmSudo(BaseRunner):
|
|
504
536
|
tasks=[
|
505
537
|
SlurmTask(
|
506
538
|
index=0,
|
507
|
-
component=
|
539
|
+
component=task_files.component,
|
508
540
|
parameters=parameters,
|
509
541
|
workdir_remote=workdir_remote,
|
510
542
|
workdir_local=workdir_local,
|
@@ -513,26 +545,56 @@ class RunnerSlurmSudo(BaseRunner):
|
|
513
545
|
],
|
514
546
|
) # TODO: replace with actual values (BASED ON TASKFILES)
|
515
547
|
|
516
|
-
|
548
|
+
config.parallel_tasks_per_job = 1
|
517
549
|
self._submit_single_sbatch(
|
518
550
|
func,
|
519
551
|
slurm_job=slurm_job,
|
520
|
-
slurm_config=
|
552
|
+
slurm_config=config,
|
521
553
|
)
|
522
|
-
|
523
|
-
|
554
|
+
logger.debug("END SUBMISSION PHASE")
|
555
|
+
logger.debug(f"{self.jobs=}")
|
556
|
+
logger.debug(f"{self.job_ids=}")
|
557
|
+
|
558
|
+
# FIXME
|
559
|
+
jobs_that_started = set()
|
560
|
+
while len(jobs_that_started) != len(self.job_ids):
|
561
|
+
logger.debug("CALL SQUEUE")
|
562
|
+
res = run_squeue(self.job_ids)
|
563
|
+
new_jobs = set(out.split()[0] for out in res.stdout.splitlines())
|
564
|
+
jobs_that_started = jobs_that_started.union(new_jobs)
|
565
|
+
logger.debug(f"{new_jobs=}")
|
566
|
+
logger.debug(f"{len(jobs_that_started)=}")
|
567
|
+
|
568
|
+
logger.debug("START RETRIEVAL PHASE")
|
524
569
|
|
525
570
|
# Retrieval phase
|
526
571
|
while len(self.jobs) > 0:
|
527
572
|
if self.is_shutdown():
|
528
573
|
self.scancel_jobs()
|
529
574
|
finished_job_ids = get_finished_jobs(job_ids=self.job_ids)
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
575
|
+
logger.debug(f"{finished_job_ids=}")
|
576
|
+
with next(get_sync_db()) as db:
|
577
|
+
for slurm_job_id in finished_job_ids:
|
578
|
+
logger.debug(f"Now process {slurm_job_id=}")
|
579
|
+
slurm_job = self.jobs.pop(slurm_job_id)
|
580
|
+
self._copy_files_from_remote_to_local(slurm_job)
|
581
|
+
result, exception = self._postprocess_single_task(
|
582
|
+
task=slurm_job.tasks[0]
|
583
|
+
)
|
584
|
+
if result is not None:
|
585
|
+
if task_type not in ["compound", "converter_compound"]:
|
586
|
+
update_status_of_history_unit(
|
587
|
+
history_unit_id=history_unit_id,
|
588
|
+
status=HistoryUnitStatus.DONE,
|
589
|
+
db_sync=db,
|
590
|
+
)
|
591
|
+
if exception is not None:
|
592
|
+
update_status_of_history_unit(
|
593
|
+
history_unit_id=history_unit_id,
|
594
|
+
status=HistoryUnitStatus.FAILED,
|
595
|
+
db_sync=db,
|
596
|
+
)
|
597
|
+
|
536
598
|
time.sleep(self.slurm_poll_interval)
|
537
599
|
|
538
600
|
return result, exception
|
@@ -541,19 +603,38 @@ class RunnerSlurmSudo(BaseRunner):
|
|
541
603
|
self,
|
542
604
|
func: callable,
|
543
605
|
list_parameters: list[dict],
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
606
|
+
history_unit_ids: list[int],
|
607
|
+
list_task_files: list[TaskFiles],
|
608
|
+
task_type: Literal["parallel", "compound", "converter_compound"],
|
609
|
+
config: SlurmConfig,
|
548
610
|
):
|
549
|
-
|
611
|
+
|
612
|
+
if len(self.jobs) > 0:
|
613
|
+
raise RuntimeError(f"Cannot run .submit when {len(self.jobs)=}")
|
614
|
+
|
615
|
+
if task_type in ["compound", "converter_compound"]:
|
616
|
+
if len(history_unit_ids) != 1:
|
617
|
+
raise NotImplementedError(
|
618
|
+
"We are breaking the assumption that compound/multisubmit "
|
619
|
+
"is associated to a single HistoryUnit. This is not "
|
620
|
+
"supported."
|
621
|
+
)
|
622
|
+
elif task_type == "parallel" and len(history_unit_ids) != len(
|
623
|
+
list_parameters
|
624
|
+
):
|
625
|
+
raise ValueError(
|
626
|
+
f"{len(history_unit_ids)=} differs from "
|
627
|
+
f"{len(list_parameters)=}."
|
628
|
+
)
|
550
629
|
|
551
630
|
self.validate_multisubmit_parameters(
|
552
|
-
list_parameters=list_parameters,
|
631
|
+
list_parameters=list_parameters,
|
632
|
+
task_type=task_type,
|
633
|
+
list_task_files=list_task_files,
|
553
634
|
)
|
554
635
|
|
555
|
-
workdir_local =
|
556
|
-
workdir_remote =
|
636
|
+
workdir_local = list_task_files[0].wftask_subfolder_local
|
637
|
+
workdir_remote = list_task_files[0].wftask_subfolder_remote
|
557
638
|
|
558
639
|
# Create local&remote task subfolders
|
559
640
|
if task_type not in ["converter_compound", "compound"]:
|
@@ -571,7 +652,7 @@ class RunnerSlurmSudo(BaseRunner):
|
|
571
652
|
results: dict[int, Any] = {}
|
572
653
|
exceptions: dict[int, BaseException] = {}
|
573
654
|
|
574
|
-
original_task_files =
|
655
|
+
original_task_files = list_task_files
|
575
656
|
tot_tasks = len(list_parameters)
|
576
657
|
|
577
658
|
# Set/validate parameters for task batching
|
@@ -579,21 +660,21 @@ class RunnerSlurmSudo(BaseRunner):
|
|
579
660
|
# Number of parallel components (always known)
|
580
661
|
tot_tasks=tot_tasks,
|
581
662
|
# Optional WorkflowTask attributes:
|
582
|
-
tasks_per_job=
|
583
|
-
parallel_tasks_per_job=
|
663
|
+
tasks_per_job=config.tasks_per_job,
|
664
|
+
parallel_tasks_per_job=config.parallel_tasks_per_job, # noqa
|
584
665
|
# Task requirements (multiple possible sources):
|
585
|
-
cpus_per_task=
|
586
|
-
mem_per_task=
|
666
|
+
cpus_per_task=config.cpus_per_task,
|
667
|
+
mem_per_task=config.mem_per_task_MB,
|
587
668
|
# Fractal configuration variables (soft/hard limits):
|
588
|
-
target_cpus_per_job=
|
589
|
-
target_mem_per_job=
|
590
|
-
target_num_jobs=
|
591
|
-
max_cpus_per_job=
|
592
|
-
max_mem_per_job=
|
593
|
-
max_num_jobs=
|
669
|
+
target_cpus_per_job=config.target_cpus_per_job,
|
670
|
+
target_mem_per_job=config.target_mem_per_job,
|
671
|
+
target_num_jobs=config.target_num_jobs,
|
672
|
+
max_cpus_per_job=config.max_cpus_per_job,
|
673
|
+
max_mem_per_job=config.max_mem_per_job,
|
674
|
+
max_num_jobs=config.max_num_jobs,
|
594
675
|
)
|
595
|
-
|
596
|
-
|
676
|
+
config.parallel_tasks_per_job = parallel_tasks_per_job
|
677
|
+
config.tasks_per_job = tasks_per_job
|
597
678
|
|
598
679
|
# Divide arguments in batches of `tasks_per_job` tasks each
|
599
680
|
args_batches = []
|
@@ -607,24 +688,18 @@ class RunnerSlurmSudo(BaseRunner):
|
|
607
688
|
|
608
689
|
logger.info(f"START submission phase, {list(self.jobs.keys())=}")
|
609
690
|
for ind_batch, chunk in enumerate(args_batches):
|
610
|
-
# TODO: replace with actual values
|
611
691
|
tasks = []
|
612
692
|
for ind_chunk, parameters in enumerate(chunk):
|
613
|
-
|
693
|
+
index = (ind_batch * batch_size) + ind_chunk
|
614
694
|
tasks.append(
|
615
695
|
SlurmTask(
|
616
|
-
index=
|
617
|
-
component=component,
|
696
|
+
index=index,
|
697
|
+
component=original_task_files[index].component,
|
618
698
|
workdir_local=workdir_local,
|
619
699
|
workdir_remote=workdir_remote,
|
620
700
|
parameters=parameters,
|
621
701
|
zarr_url=parameters["zarr_url"],
|
622
|
-
task_files=
|
623
|
-
**original_task_files.model_dump(
|
624
|
-
exclude={"component"}
|
625
|
-
),
|
626
|
-
component=component,
|
627
|
-
),
|
702
|
+
task_files=original_task_files[index],
|
628
703
|
),
|
629
704
|
)
|
630
705
|
|
@@ -637,26 +712,56 @@ class RunnerSlurmSudo(BaseRunner):
|
|
637
712
|
self._submit_single_sbatch(
|
638
713
|
func,
|
639
714
|
slurm_job=slurm_job,
|
640
|
-
slurm_config=
|
715
|
+
slurm_config=config,
|
641
716
|
)
|
642
717
|
logger.info(f"END submission phase, {list(self.jobs.keys())=}")
|
643
718
|
|
719
|
+
# FIXME
|
720
|
+
jobs_that_started = set()
|
721
|
+
while len(jobs_that_started) != len(self.job_ids):
|
722
|
+
res = run_squeue(self.job_ids)
|
723
|
+
new_jobs = set(out.split()[0] for out in res.stdout.splitlines())
|
724
|
+
jobs_that_started = jobs_that_started.union(new_jobs)
|
725
|
+
logger.debug(f"{new_jobs=}")
|
726
|
+
logger.debug(f"{len(jobs_that_started)=}")
|
727
|
+
|
644
728
|
# Retrieval phase
|
645
729
|
while len(self.jobs) > 0:
|
646
730
|
if self.is_shutdown():
|
647
731
|
self.scancel_jobs()
|
648
732
|
finished_job_ids = get_finished_jobs(job_ids=self.job_ids)
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
733
|
+
logger.debug(f"{finished_job_ids=}")
|
734
|
+
with next(get_sync_db()) as db:
|
735
|
+
for slurm_job_id in finished_job_ids:
|
736
|
+
logger.debug(f"Now processing {slurm_job_id=}")
|
737
|
+
slurm_job = self.jobs.pop(slurm_job_id)
|
738
|
+
self._copy_files_from_remote_to_local(slurm_job)
|
739
|
+
for task in slurm_job.tasks:
|
740
|
+
result, exception = self._postprocess_single_task(
|
741
|
+
task=task
|
742
|
+
)
|
743
|
+
|
744
|
+
if result is not None:
|
745
|
+
results[task.index] = result
|
746
|
+
if task_type == "parallel":
|
747
|
+
update_status_of_history_unit(
|
748
|
+
history_unit_id=history_unit_ids[
|
749
|
+
task.index
|
750
|
+
],
|
751
|
+
status=HistoryUnitStatus.DONE,
|
752
|
+
db_sync=db,
|
753
|
+
)
|
754
|
+
if exception is not None:
|
755
|
+
exceptions[task.index] = exception
|
756
|
+
if task_type == "parallel":
|
757
|
+
update_status_of_history_unit(
|
758
|
+
history_unit_id=history_unit_ids[
|
759
|
+
task.index
|
760
|
+
],
|
761
|
+
status=HistoryUnitStatus.FAILED,
|
762
|
+
db_sync=db,
|
763
|
+
)
|
764
|
+
|
660
765
|
time.sleep(self.slurm_poll_interval)
|
661
766
|
return results, exceptions
|
662
767
|
|
@@ -96,3 +96,11 @@ class TaskFiles(BaseModel):
|
|
96
96
|
return (
|
97
97
|
self.wftask_subfolder_remote / f"{self.component}-metadiff.json"
|
98
98
|
).as_posix()
|
99
|
+
|
100
|
+
@property
|
101
|
+
def remote_files_dict(self) -> dict[str, str]:
|
102
|
+
return dict(
|
103
|
+
args_file_remote=self.args_file_remote,
|
104
|
+
metadiff_file_remote=self.metadiff_file_remote,
|
105
|
+
log_file_remote=self.log_file_remote,
|
106
|
+
)
|