fractal-server 2.14.0a32__py3-none-any.whl → 2.14.0a34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/runner/executors/local/runner.py +95 -35
- fractal_server/app/runner/executors/slurm_common/base_slurm_runner.py +283 -237
- fractal_server/app/runner/executors/slurm_ssh/runner.py +60 -0
- fractal_server/app/runner/executors/slurm_sudo/runner.py +13 -1
- fractal_server/app/runner/v2/runner_functions.py +12 -1
- fractal_server/ssh/_fabric.py +24 -12
- {fractal_server-2.14.0a32.dist-info → fractal_server-2.14.0a34.dist-info}/METADATA +1 -1
- {fractal_server-2.14.0a32.dist-info → fractal_server-2.14.0a34.dist-info}/RECORD +12 -12
- {fractal_server-2.14.0a32.dist-info → fractal_server-2.14.0a34.dist-info}/LICENSE +0 -0
- {fractal_server-2.14.0a32.dist-info → fractal_server-2.14.0a34.dist-info}/WHEEL +0 -0
- {fractal_server-2.14.0a32.dist-info → fractal_server-2.14.0a34.dist-info}/entry_points.txt +0 -0
fractal_server/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__VERSION__ = "2.14.
|
1
|
+
__VERSION__ = "2.14.0a34"
|
@@ -9,6 +9,9 @@ from fractal_server.app.db import get_sync_db
|
|
9
9
|
from fractal_server.app.runner.exceptions import TaskExecutionError
|
10
10
|
from fractal_server.app.runner.executors.base_runner import BaseRunner
|
11
11
|
from fractal_server.app.runner.task_files import TaskFiles
|
12
|
+
from fractal_server.app.runner.v2.db_tools import (
|
13
|
+
bulk_update_status_of_history_unit,
|
14
|
+
)
|
12
15
|
from fractal_server.app.runner.v2.db_tools import update_status_of_history_unit
|
13
16
|
from fractal_server.app.schemas.v2 import HistoryUnitStatus
|
14
17
|
from fractal_server.logger import set_logger
|
@@ -58,16 +61,31 @@ class LocalRunner(BaseRunner):
|
|
58
61
|
) -> tuple[Any, Exception]:
|
59
62
|
logger.debug("[submit] START")
|
60
63
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
+
try:
|
65
|
+
self.validate_submit_parameters(parameters, task_type=task_type)
|
66
|
+
workdir_local = task_files.wftask_subfolder_local
|
67
|
+
workdir_local.mkdir()
|
64
68
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
69
|
+
# SUBMISSION PHASE
|
70
|
+
future = self.executor.submit(
|
71
|
+
func,
|
72
|
+
parameters=parameters,
|
73
|
+
remote_files=task_files.remote_files_dict,
|
74
|
+
)
|
75
|
+
except Exception as e:
|
76
|
+
logger.error(
|
77
|
+
"[submit] Unexpected exception during submission. "
|
78
|
+
f"Original error {str(e)}"
|
79
|
+
)
|
80
|
+
result = None
|
81
|
+
exception = TaskExecutionError(str(e))
|
82
|
+
with next(get_sync_db()) as db:
|
83
|
+
update_status_of_history_unit(
|
84
|
+
history_unit_id=history_unit_id,
|
85
|
+
status=HistoryUnitStatus.FAILED,
|
86
|
+
db_sync=db,
|
87
|
+
)
|
88
|
+
return None, exception
|
71
89
|
|
72
90
|
# RETRIEVAL PHASE
|
73
91
|
with next(get_sync_db()) as db:
|
@@ -105,29 +123,50 @@ class LocalRunner(BaseRunner):
|
|
105
123
|
input images, while for compound tasks these can differ.
|
106
124
|
"""
|
107
125
|
|
108
|
-
self.validate_multisubmit_parameters(
|
109
|
-
list_parameters=list_parameters,
|
110
|
-
task_type=task_type,
|
111
|
-
list_task_files=list_task_files,
|
112
|
-
history_unit_ids=history_unit_ids,
|
113
|
-
)
|
114
|
-
|
115
126
|
logger.debug(f"[multisubmit] START, {len(list_parameters)=}")
|
127
|
+
results: dict[int, Any] = {}
|
128
|
+
exceptions: dict[int, BaseException] = {}
|
116
129
|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
130
|
+
try:
|
131
|
+
|
132
|
+
self.validate_multisubmit_parameters(
|
133
|
+
list_parameters=list_parameters,
|
134
|
+
task_type=task_type,
|
135
|
+
list_task_files=list_task_files,
|
136
|
+
history_unit_ids=history_unit_ids,
|
137
|
+
)
|
138
|
+
|
139
|
+
workdir_local = list_task_files[0].wftask_subfolder_local
|
140
|
+
if task_type == "parallel":
|
141
|
+
workdir_local.mkdir()
|
142
|
+
|
143
|
+
# Set `n_elements` and `parallel_tasks_per_job`
|
144
|
+
n_elements = len(list_parameters)
|
145
|
+
parallel_tasks_per_job = config.parallel_tasks_per_job
|
146
|
+
if parallel_tasks_per_job is None:
|
147
|
+
parallel_tasks_per_job = n_elements
|
148
|
+
|
149
|
+
except Exception as e:
|
150
|
+
logger.error(
|
151
|
+
"[multisubmit] Unexpected exception during preliminary phase. "
|
152
|
+
f"Original error {str(e)}"
|
153
|
+
)
|
154
|
+
exception = TaskExecutionError(str(e))
|
155
|
+
exceptions = {
|
156
|
+
ind: exception for ind in range(len(list_parameters))
|
157
|
+
}
|
158
|
+
if task_type == "parallel":
|
159
|
+
with next(get_sync_db()) as db:
|
160
|
+
bulk_update_status_of_history_unit(
|
161
|
+
history_unit_ids=history_unit_ids,
|
162
|
+
status=HistoryUnitStatus.FAILED,
|
163
|
+
db_sync=db,
|
164
|
+
)
|
165
|
+
return results, exceptions
|
126
166
|
|
127
167
|
# Execute tasks, in chunks of size `parallel_tasks_per_job`
|
128
|
-
results: dict[int, Any] = {}
|
129
|
-
exceptions: dict[int, BaseException] = {}
|
130
168
|
for ind_chunk in range(0, n_elements, parallel_tasks_per_job):
|
169
|
+
|
131
170
|
list_parameters_chunk = list_parameters[
|
132
171
|
ind_chunk : ind_chunk + parallel_tasks_per_job
|
133
172
|
]
|
@@ -135,15 +174,31 @@ class LocalRunner(BaseRunner):
|
|
135
174
|
active_futures: dict[int, Future] = {}
|
136
175
|
for ind_within_chunk, kwargs in enumerate(list_parameters_chunk):
|
137
176
|
positional_index = ind_chunk + ind_within_chunk
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
177
|
+
try:
|
178
|
+
future = self.executor.submit(
|
179
|
+
func,
|
180
|
+
parameters=kwargs,
|
181
|
+
remote_files=list_task_files[
|
182
|
+
positional_index
|
183
|
+
].remote_files_dict,
|
184
|
+
)
|
185
|
+
active_futures[positional_index] = future
|
186
|
+
except Exception as e:
|
187
|
+
logger.error(
|
188
|
+
"[multisubmit] Unexpected exception during submission."
|
189
|
+
f" Original error {str(e)}"
|
190
|
+
)
|
191
|
+
current_history_unit_id = history_unit_ids[
|
142
192
|
positional_index
|
143
|
-
]
|
144
|
-
|
145
|
-
|
146
|
-
|
193
|
+
]
|
194
|
+
exceptions[positional_index] = TaskExecutionError(str(e))
|
195
|
+
if task_type == "parallel":
|
196
|
+
with next(get_sync_db()) as db:
|
197
|
+
update_status_of_history_unit(
|
198
|
+
history_unit_id=current_history_unit_id,
|
199
|
+
status=HistoryUnitStatus.FAILED,
|
200
|
+
db_sync=db,
|
201
|
+
)
|
147
202
|
while active_futures:
|
148
203
|
finished_futures = [
|
149
204
|
index_and_future
|
@@ -171,6 +226,11 @@ class LocalRunner(BaseRunner):
|
|
171
226
|
)
|
172
227
|
|
173
228
|
except Exception as e:
|
229
|
+
logger.debug(
|
230
|
+
"Multisubmit failed in retrieval "
|
231
|
+
"phase with the following error "
|
232
|
+
f"{str(e)}"
|
233
|
+
)
|
174
234
|
exceptions[positional_index] = TaskExecutionError(
|
175
235
|
str(e)
|
176
236
|
)
|
@@ -100,65 +100,51 @@ class BaseSlurmRunner(BaseRunner):
|
|
100
100
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
101
101
|
return False
|
102
102
|
|
103
|
-
def _run_local_cmd(self, cmd: str) -> str:
|
104
|
-
raise NotImplementedError("Implement in child class.")
|
105
|
-
|
106
103
|
def _run_remote_cmd(self, cmd: str) -> str:
|
107
104
|
raise NotImplementedError("Implement in child class.")
|
108
105
|
|
109
|
-
def run_squeue(self, job_ids: list[str]) ->
|
110
|
-
|
111
|
-
|
112
|
-
if len(job_ids) == 0:
|
113
|
-
return (False, "")
|
114
|
-
|
115
|
-
job_id_single_str = ",".join([str(j) for j in job_ids])
|
116
|
-
cmd = (
|
117
|
-
f"squeue --noheader --format='%i %T' --jobs {job_id_single_str}"
|
118
|
-
" --states=all"
|
119
|
-
)
|
120
|
-
|
121
|
-
try:
|
122
|
-
if self.slurm_runner_type == "sudo":
|
123
|
-
stdout = self._run_local_cmd(cmd)
|
124
|
-
else:
|
125
|
-
stdout = self._run_remote_cmd(cmd)
|
126
|
-
return (True, stdout)
|
127
|
-
except Exception as e:
|
128
|
-
logger.info(f"{cmd=} failed with {str(e)}")
|
129
|
-
return (False, "")
|
106
|
+
def run_squeue(self, *, job_ids: list[str], **kwargs) -> str:
|
107
|
+
raise NotImplementedError("Implement in child class.")
|
130
108
|
|
131
109
|
def _get_finished_jobs(self, job_ids: list[str]) -> set[str]:
|
132
|
-
# If there is no Slurm job to check, return right away
|
133
110
|
|
111
|
+
# If there is no Slurm job to check, return right away
|
134
112
|
if not job_ids:
|
135
113
|
return set()
|
136
|
-
id_to_state = dict()
|
137
114
|
|
138
|
-
|
139
|
-
|
140
|
-
|
115
|
+
try:
|
116
|
+
stdout = self.run_squeue(job_ids=job_ids)
|
117
|
+
slurm_statuses = {
|
141
118
|
out.split()[0]: out.split()[1] for out in stdout.splitlines()
|
142
119
|
}
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
120
|
+
except Exception as e:
|
121
|
+
logger.warning(
|
122
|
+
"[_get_finished_jobs] `squeue` failed, "
|
123
|
+
"retry with individual job IDs. "
|
124
|
+
f"Original error: {str(e)}."
|
125
|
+
)
|
126
|
+
slurm_statuses = dict()
|
127
|
+
for job_id in job_ids:
|
128
|
+
try:
|
129
|
+
stdout = self.run_squeue(job_ids=[job_id])
|
130
|
+
slurm_statuses.update(
|
131
|
+
{stdout.split()[0]: stdout.split()[1]}
|
153
132
|
)
|
133
|
+
except Exception as e:
|
134
|
+
logger.warning(
|
135
|
+
"[_get_finished_jobs] `squeue` failed for "
|
136
|
+
f"{job_id=}, mark job as completed. "
|
137
|
+
f"Original error: {str(e)}."
|
138
|
+
)
|
139
|
+
slurm_statuses.update({str(job_id): "COMPLETED"})
|
154
140
|
|
155
|
-
#
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
if id_to_state.get(j, "COMPLETED") in STATES_FINISHED
|
141
|
+
# If a job is not in `squeue` output, mark it as completed.
|
142
|
+
finished_jobs = {
|
143
|
+
job_id
|
144
|
+
for job_id in job_ids
|
145
|
+
if slurm_statuses.get(job_id, "COMPLETED") in STATES_FINISHED
|
161
146
|
}
|
147
|
+
return finished_jobs
|
162
148
|
|
163
149
|
def _mkdir_local_folder(self, folder: str) -> None:
|
164
150
|
raise NotImplementedError("Implement in child class.")
|
@@ -421,27 +407,34 @@ class BaseSlurmRunner(BaseRunner):
|
|
421
407
|
"""
|
422
408
|
# Sleep for `self.poll_interval`, but keep checking for shutdowns
|
423
409
|
start_time = time.perf_counter()
|
424
|
-
|
425
|
-
|
410
|
+
# Always wait at least 0.2 (note: this is for cases where
|
411
|
+
# `poll_interval=0`).
|
412
|
+
waiting_time = max(self.poll_interval, 0.2)
|
413
|
+
max_time = start_time + waiting_time
|
426
414
|
logger.debug(
|
427
415
|
"[wait_and_check_shutdown] "
|
428
416
|
f"I will wait at most {self.poll_interval} s, "
|
429
417
|
f"in blocks of {self.poll_interval_internal} s."
|
430
418
|
)
|
431
419
|
|
432
|
-
while
|
433
|
-
# Handle shutdown
|
420
|
+
while time.perf_counter() < max_time:
|
434
421
|
if self.is_shutdown():
|
435
422
|
logger.info("[wait_and_check_shutdown] Shutdown file detected")
|
436
423
|
scancelled_job_ids = self.scancel_jobs()
|
437
424
|
logger.info(f"[wait_and_check_shutdown] {scancelled_job_ids=}")
|
438
425
|
return scancelled_job_ids
|
439
|
-
can_return = True
|
440
426
|
time.sleep(self.poll_interval_internal)
|
441
427
|
|
442
428
|
logger.debug("[wait_and_check_shutdown] No shutdown file detected")
|
443
429
|
return []
|
444
430
|
|
431
|
+
def _check_no_active_jobs(self):
|
432
|
+
if self.jobs != {}:
|
433
|
+
raise JobExecutionError(
|
434
|
+
"Unexpected branch: jobs must be empty before new "
|
435
|
+
"submissions."
|
436
|
+
)
|
437
|
+
|
445
438
|
def submit(
|
446
439
|
self,
|
447
440
|
func: callable,
|
@@ -457,107 +450,125 @@ class BaseSlurmRunner(BaseRunner):
|
|
457
450
|
],
|
458
451
|
) -> tuple[Any, Exception]:
|
459
452
|
logger.info("[submit] START")
|
453
|
+
try:
|
454
|
+
workdir_local = task_files.wftask_subfolder_local
|
455
|
+
workdir_remote = task_files.wftask_subfolder_remote
|
460
456
|
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
with next(get_sync_db()) as db:
|
469
|
-
update_status_of_history_unit(
|
470
|
-
history_unit_id=history_unit_id,
|
471
|
-
status=HistoryUnitStatus.FAILED,
|
472
|
-
db_sync=db,
|
473
|
-
)
|
457
|
+
if self.is_shutdown():
|
458
|
+
with next(get_sync_db()) as db:
|
459
|
+
update_status_of_history_unit(
|
460
|
+
history_unit_id=history_unit_id,
|
461
|
+
status=HistoryUnitStatus.FAILED,
|
462
|
+
db_sync=db,
|
463
|
+
)
|
474
464
|
|
475
|
-
|
465
|
+
return None, SHUTDOWN_EXCEPTION
|
476
466
|
|
477
|
-
|
478
|
-
self.validate_submit_parameters(
|
479
|
-
parameters=parameters,
|
480
|
-
task_type=task_type,
|
481
|
-
)
|
467
|
+
self._check_no_active_jobs()
|
482
468
|
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
# Submission phase
|
490
|
-
slurm_job = SlurmJob(
|
491
|
-
prefix=task_files.prefix,
|
492
|
-
workdir_local=workdir_local,
|
493
|
-
workdir_remote=workdir_remote,
|
494
|
-
tasks=[
|
495
|
-
SlurmTask(
|
496
|
-
prefix=task_files.prefix,
|
497
|
-
index=0,
|
498
|
-
component=task_files.component,
|
499
|
-
parameters=parameters,
|
500
|
-
workdir_remote=workdir_remote,
|
501
|
-
workdir_local=workdir_local,
|
502
|
-
task_files=task_files,
|
503
|
-
)
|
504
|
-
],
|
505
|
-
)
|
506
|
-
|
507
|
-
config.parallel_tasks_per_job = 1
|
508
|
-
self._submit_single_sbatch(
|
509
|
-
func,
|
510
|
-
slurm_job=slurm_job,
|
511
|
-
slurm_config=config,
|
512
|
-
)
|
513
|
-
logger.info(f"[submit] END submission phase, {self.job_ids=}")
|
514
|
-
|
515
|
-
# NOTE: see issue 2444
|
516
|
-
settings = Inject(get_settings)
|
517
|
-
sleep_time = settings.FRACTAL_SLURM_INTERVAL_BEFORE_RETRIEVAL
|
518
|
-
logger.warning(f"[submit] Now sleep {sleep_time} seconds.")
|
519
|
-
time.sleep(sleep_time)
|
469
|
+
# Validation phase
|
470
|
+
self.validate_submit_parameters(
|
471
|
+
parameters=parameters,
|
472
|
+
task_type=task_type,
|
473
|
+
)
|
520
474
|
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
475
|
+
# Create task subfolder
|
476
|
+
logger.info("[submit] Create local/remote folders - START")
|
477
|
+
self._mkdir_local_folder(folder=workdir_local.as_posix())
|
478
|
+
self._mkdir_remote_folder(folder=workdir_remote.as_posix())
|
479
|
+
logger.info("[submit] Create local/remote folders - END")
|
480
|
+
|
481
|
+
# Submission phase
|
482
|
+
slurm_job = SlurmJob(
|
483
|
+
prefix=task_files.prefix,
|
484
|
+
workdir_local=workdir_local,
|
485
|
+
workdir_remote=workdir_remote,
|
486
|
+
tasks=[
|
487
|
+
SlurmTask(
|
488
|
+
prefix=task_files.prefix,
|
489
|
+
index=0,
|
490
|
+
component=task_files.component,
|
491
|
+
parameters=parameters,
|
492
|
+
workdir_remote=workdir_remote,
|
493
|
+
workdir_local=workdir_local,
|
494
|
+
task_files=task_files,
|
540
495
|
)
|
496
|
+
],
|
497
|
+
)
|
541
498
|
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
499
|
+
config.parallel_tasks_per_job = 1
|
500
|
+
self._submit_single_sbatch(
|
501
|
+
func,
|
502
|
+
slurm_job=slurm_job,
|
503
|
+
slurm_config=config,
|
504
|
+
)
|
505
|
+
logger.info(f"[submit] END submission phase, {self.job_ids=}")
|
506
|
+
|
507
|
+
# NOTE: see issue 2444
|
508
|
+
settings = Inject(get_settings)
|
509
|
+
sleep_time = settings.FRACTAL_SLURM_INTERVAL_BEFORE_RETRIEVAL
|
510
|
+
logger.warning(f"[submit] Now sleep {sleep_time} seconds.")
|
511
|
+
time.sleep(sleep_time)
|
512
|
+
|
513
|
+
# Retrieval phase
|
514
|
+
logger.info("[submit] START retrieval phase")
|
515
|
+
scancelled_job_ids = []
|
516
|
+
while len(self.jobs) > 0:
|
517
|
+
# Look for finished jobs
|
518
|
+
finished_job_ids = self._get_finished_jobs(
|
519
|
+
job_ids=self.job_ids
|
520
|
+
)
|
521
|
+
logger.debug(f"[submit] {finished_job_ids=}")
|
522
|
+
finished_jobs = [
|
523
|
+
self.jobs[_slurm_job_id]
|
524
|
+
for _slurm_job_id in finished_job_ids
|
525
|
+
]
|
526
|
+
self._fetch_artifacts(finished_jobs)
|
527
|
+
with next(get_sync_db()) as db:
|
528
|
+
for slurm_job_id in finished_job_ids:
|
529
|
+
logger.debug(f"[submit] Now process {slurm_job_id=}")
|
530
|
+
slurm_job = self.jobs.pop(slurm_job_id)
|
531
|
+
was_job_scancelled = slurm_job_id in scancelled_job_ids
|
532
|
+
result, exception = self._postprocess_single_task(
|
533
|
+
task=slurm_job.tasks[0],
|
534
|
+
was_job_scancelled=was_job_scancelled,
|
547
535
|
)
|
548
|
-
|
549
|
-
if
|
536
|
+
|
537
|
+
if exception is not None:
|
550
538
|
update_status_of_history_unit(
|
551
539
|
history_unit_id=history_unit_id,
|
552
|
-
status=HistoryUnitStatus.
|
540
|
+
status=HistoryUnitStatus.FAILED,
|
553
541
|
db_sync=db,
|
554
542
|
)
|
543
|
+
else:
|
544
|
+
if task_type not in [
|
545
|
+
"compound",
|
546
|
+
"converter_compound",
|
547
|
+
]:
|
548
|
+
update_status_of_history_unit(
|
549
|
+
history_unit_id=history_unit_id,
|
550
|
+
status=HistoryUnitStatus.DONE,
|
551
|
+
db_sync=db,
|
552
|
+
)
|
555
553
|
|
556
|
-
|
557
|
-
|
554
|
+
if len(self.jobs) > 0:
|
555
|
+
scancelled_job_ids = self.wait_and_check_shutdown()
|
556
|
+
|
557
|
+
logger.info("[submit] END")
|
558
|
+
return result, exception
|
558
559
|
|
559
|
-
|
560
|
-
|
560
|
+
except Exception as e:
|
561
|
+
logger.error(
|
562
|
+
f"[submit] Unexpected exception. Original error: {str(e)}"
|
563
|
+
)
|
564
|
+
with next(get_sync_db()) as db:
|
565
|
+
update_status_of_history_unit(
|
566
|
+
history_unit_id=history_unit_id,
|
567
|
+
status=HistoryUnitStatus.FAILED,
|
568
|
+
db_sync=db,
|
569
|
+
)
|
570
|
+
self.scancel_jobs()
|
571
|
+
return None, e
|
561
572
|
|
562
573
|
def multisubmit(
|
563
574
|
self,
|
@@ -574,108 +585,120 @@ class BaseSlurmRunner(BaseRunner):
|
|
574
585
|
input images, while for compound tasks these can differ.
|
575
586
|
"""
|
576
587
|
|
577
|
-
if len(self.jobs) > 0:
|
578
|
-
raise RuntimeError(
|
579
|
-
f"Cannot run `multisubmit` when {len(self.jobs)=}"
|
580
|
-
)
|
581
|
-
|
582
|
-
if self.is_shutdown():
|
583
|
-
if task_type == "parallel":
|
584
|
-
with next(get_sync_db()) as db:
|
585
|
-
bulk_update_status_of_history_unit(
|
586
|
-
history_unit_ids=history_unit_ids,
|
587
|
-
status=HistoryUnitStatus.FAILED,
|
588
|
-
db_sync=db,
|
589
|
-
)
|
590
|
-
results = {}
|
591
|
-
exceptions = {
|
592
|
-
ind: SHUTDOWN_EXCEPTION for ind in range(len(list_parameters))
|
593
|
-
}
|
594
|
-
return results, exceptions
|
595
|
-
|
596
|
-
self.validate_multisubmit_parameters(
|
597
|
-
list_parameters=list_parameters,
|
598
|
-
task_type=task_type,
|
599
|
-
list_task_files=list_task_files,
|
600
|
-
history_unit_ids=history_unit_ids,
|
601
|
-
)
|
602
|
-
|
603
588
|
logger.info(f"[multisubmit] START, {len(list_parameters)=}")
|
589
|
+
try:
|
604
590
|
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
591
|
+
if self.is_shutdown():
|
592
|
+
if task_type == "parallel":
|
593
|
+
with next(get_sync_db()) as db:
|
594
|
+
bulk_update_status_of_history_unit(
|
595
|
+
history_unit_ids=history_unit_ids,
|
596
|
+
status=HistoryUnitStatus.FAILED,
|
597
|
+
db_sync=db,
|
598
|
+
)
|
599
|
+
results = {}
|
600
|
+
exceptions = {
|
601
|
+
ind: SHUTDOWN_EXCEPTION
|
602
|
+
for ind in range(len(list_parameters))
|
603
|
+
}
|
604
|
+
return results, exceptions
|
605
|
+
|
606
|
+
self._check_no_active_jobs()
|
607
|
+
self.validate_multisubmit_parameters(
|
608
|
+
list_parameters=list_parameters,
|
609
|
+
task_type=task_type,
|
610
|
+
list_task_files=list_task_files,
|
611
|
+
history_unit_ids=history_unit_ids,
|
612
|
+
)
|
620
613
|
|
621
|
-
|
622
|
-
|
614
|
+
workdir_local = list_task_files[0].wftask_subfolder_local
|
615
|
+
workdir_remote = list_task_files[0].wftask_subfolder_remote
|
623
616
|
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
617
|
+
# Create local&remote task subfolders
|
618
|
+
if task_type == "parallel":
|
619
|
+
self._mkdir_local_folder(workdir_local.as_posix())
|
620
|
+
self._mkdir_remote_folder(folder=workdir_remote.as_posix())
|
621
|
+
|
622
|
+
results: dict[int, Any] = {}
|
623
|
+
exceptions: dict[int, BaseException] = {}
|
624
|
+
|
625
|
+
# NOTE: chunking has already taken place in `get_slurm_config`,
|
626
|
+
# so that `config.tasks_per_job` is now set.
|
627
|
+
|
628
|
+
# Divide arguments in batches of `tasks_per_job` tasks each
|
629
|
+
tot_tasks = len(list_parameters)
|
630
|
+
args_batches = []
|
631
|
+
batch_size = config.tasks_per_job
|
632
|
+
for ind_chunk in range(0, tot_tasks, batch_size):
|
633
|
+
args_batches.append(
|
634
|
+
list_parameters[ind_chunk : ind_chunk + batch_size] # noqa
|
635
|
+
)
|
636
|
+
if len(args_batches) != math.ceil(
|
637
|
+
tot_tasks / config.tasks_per_job
|
638
|
+
):
|
639
|
+
raise RuntimeError("Something wrong here while batching tasks")
|
640
|
+
|
641
|
+
# Part 1/3: Iterate over chunks, prepare SlurmJob objects
|
642
|
+
logger.info("[multisubmit] Prepare `SlurmJob`s.")
|
643
|
+
jobs_to_submit = []
|
644
|
+
for ind_batch, chunk in enumerate(args_batches):
|
645
|
+
# Read prefix based on the first task of this batch
|
646
|
+
prefix = list_task_files[ind_batch * batch_size].prefix
|
647
|
+
tasks = []
|
648
|
+
for ind_chunk, parameters in enumerate(chunk):
|
649
|
+
index = (ind_batch * batch_size) + ind_chunk
|
650
|
+
tasks.append(
|
651
|
+
SlurmTask(
|
652
|
+
prefix=prefix,
|
653
|
+
index=index,
|
654
|
+
component=list_task_files[index].component,
|
655
|
+
workdir_local=workdir_local,
|
656
|
+
workdir_remote=workdir_remote,
|
657
|
+
parameters=parameters,
|
658
|
+
zarr_url=parameters["zarr_url"],
|
659
|
+
task_files=list_task_files[index],
|
660
|
+
),
|
661
|
+
)
|
662
|
+
jobs_to_submit.append(
|
663
|
+
SlurmJob(
|
645
664
|
prefix=prefix,
|
646
|
-
index=index,
|
647
|
-
component=list_task_files[index].component,
|
648
665
|
workdir_local=workdir_local,
|
649
666
|
workdir_remote=workdir_remote,
|
650
|
-
|
651
|
-
|
652
|
-
task_files=list_task_files[index],
|
653
|
-
),
|
654
|
-
)
|
655
|
-
jobs_to_submit.append(
|
656
|
-
SlurmJob(
|
657
|
-
prefix=prefix,
|
658
|
-
workdir_local=workdir_local,
|
659
|
-
workdir_remote=workdir_remote,
|
660
|
-
tasks=tasks,
|
667
|
+
tasks=tasks,
|
668
|
+
)
|
661
669
|
)
|
662
|
-
)
|
663
670
|
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
671
|
+
# NOTE: see issue 2431
|
672
|
+
logger.info("[multisubmit] Transfer files and submit jobs.")
|
673
|
+
for slurm_job in jobs_to_submit:
|
674
|
+
self._submit_single_sbatch(
|
675
|
+
func,
|
676
|
+
slurm_job=slurm_job,
|
677
|
+
slurm_config=config,
|
678
|
+
)
|
672
679
|
|
673
|
-
|
680
|
+
logger.info(f"END submission phase, {self.job_ids=}")
|
674
681
|
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
682
|
+
settings = Inject(get_settings)
|
683
|
+
sleep_time = settings.FRACTAL_SLURM_INTERVAL_BEFORE_RETRIEVAL
|
684
|
+
logger.warning(f"[submit] Now sleep {sleep_time} seconds.")
|
685
|
+
time.sleep(sleep_time)
|
686
|
+
except Exception as e:
|
687
|
+
logger.error(
|
688
|
+
"[multisubmit] Unexpected exception during submission."
|
689
|
+
f" Original error {str(e)}"
|
690
|
+
)
|
691
|
+
self.scancel_jobs()
|
692
|
+
if task_type == "parallel":
|
693
|
+
with next(get_sync_db()) as db:
|
694
|
+
bulk_update_status_of_history_unit(
|
695
|
+
history_unit_ids=history_unit_ids,
|
696
|
+
status=HistoryUnitStatus.FAILED,
|
697
|
+
db_sync=db,
|
698
|
+
)
|
699
|
+
results = {}
|
700
|
+
exceptions = {ind: e for ind in range(len(list_parameters))}
|
701
|
+
return results, exceptions
|
679
702
|
|
680
703
|
# Retrieval phase
|
681
704
|
logger.info("[multisubmit] START retrieval phase")
|
@@ -687,7 +710,16 @@ class BaseSlurmRunner(BaseRunner):
|
|
687
710
|
finished_jobs = [
|
688
711
|
self.jobs[_slurm_job_id] for _slurm_job_id in finished_job_ids
|
689
712
|
]
|
690
|
-
|
713
|
+
fetch_artifacts_exception = None
|
714
|
+
try:
|
715
|
+
self._fetch_artifacts(finished_jobs)
|
716
|
+
except Exception as e:
|
717
|
+
logger.error(
|
718
|
+
"[multisubmit] Unexpected exception in "
|
719
|
+
"`_fetch_artifacts`. "
|
720
|
+
f"Original error: {str(e)}"
|
721
|
+
)
|
722
|
+
fetch_artifacts_exception = e
|
691
723
|
|
692
724
|
with next(get_sync_db()) as db:
|
693
725
|
for slurm_job_id in finished_job_ids:
|
@@ -696,11 +728,26 @@ class BaseSlurmRunner(BaseRunner):
|
|
696
728
|
for task in slurm_job.tasks:
|
697
729
|
logger.info(f"[multisubmit] Now process {task.index=}")
|
698
730
|
was_job_scancelled = slurm_job_id in scancelled_job_ids
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
731
|
+
if fetch_artifacts_exception is not None:
|
732
|
+
result = None
|
733
|
+
exception = fetch_artifacts_exception
|
734
|
+
else:
|
735
|
+
try:
|
736
|
+
(
|
737
|
+
result,
|
738
|
+
exception,
|
739
|
+
) = self._postprocess_single_task(
|
740
|
+
task=task,
|
741
|
+
was_job_scancelled=was_job_scancelled,
|
742
|
+
)
|
743
|
+
except Exception as e:
|
744
|
+
logger.error(
|
745
|
+
"[multisubmit] Unexpected exception in "
|
746
|
+
"`_postprocess_single_task`. "
|
747
|
+
f"Original error: {str(e)}"
|
748
|
+
)
|
749
|
+
result = None
|
750
|
+
exception = e
|
704
751
|
# Note: the relevant done/failed check is based on
|
705
752
|
# whether `exception is None`. The fact that
|
706
753
|
# `result is None` is not relevant for this purpose.
|
@@ -763,16 +810,15 @@ class BaseSlurmRunner(BaseRunner):
|
|
763
810
|
|
764
811
|
def scancel_jobs(self) -> list[str]:
|
765
812
|
logger.info("[scancel_jobs] START")
|
766
|
-
|
813
|
+
scancelled_job_ids = self.job_ids
|
767
814
|
if self.jobs:
|
768
|
-
scancelled_job_ids = self.job_ids
|
769
815
|
scancel_string = " ".join(scancelled_job_ids)
|
770
816
|
scancel_cmd = f"scancel {scancel_string}"
|
771
817
|
logger.warning(f"Now scancel-ing SLURM jobs {scancel_string}")
|
772
818
|
try:
|
773
819
|
self._run_remote_cmd(scancel_cmd)
|
774
820
|
except Exception as e:
|
775
|
-
logger.
|
821
|
+
logger.error(
|
776
822
|
"[scancel_jobs] `scancel` command failed. "
|
777
823
|
f"Original error:\n{str(e)}"
|
778
824
|
)
|
@@ -9,6 +9,8 @@ from fractal_server.app.runner.extract_archive import extract_archive
|
|
9
9
|
from fractal_server.config import get_settings
|
10
10
|
from fractal_server.logger import set_logger
|
11
11
|
from fractal_server.ssh._fabric import FractalSSH
|
12
|
+
from fractal_server.ssh._fabric import FractalSSHCommandError
|
13
|
+
from fractal_server.ssh._fabric import FractalSSHTimeoutError
|
12
14
|
from fractal_server.syringe import Inject
|
13
15
|
|
14
16
|
|
@@ -206,3 +208,61 @@ class SlurmSSHRunner(BaseSlurmRunner):
|
|
206
208
|
def _run_remote_cmd(self, cmd: str) -> str:
|
207
209
|
stdout = self.fractal_ssh.run_command(cmd=cmd)
|
208
210
|
return stdout
|
211
|
+
|
212
|
+
def run_squeue(
|
213
|
+
self,
|
214
|
+
*,
|
215
|
+
job_ids: list[str],
|
216
|
+
base_interval: float = 2.0,
|
217
|
+
max_attempts: int = 7,
|
218
|
+
) -> str:
|
219
|
+
"""
|
220
|
+
Run `squeue` for a set of SLURM job IDs.
|
221
|
+
|
222
|
+
Different scenarios:
|
223
|
+
|
224
|
+
1. When `squeue -j` succeeds (with exit code 0), return its stdout.
|
225
|
+
2. When `squeue -j` fails (typical example:
|
226
|
+
`squeue -j {invalid_job_id}` fails with exit code 1), re-raise.
|
227
|
+
The error will be handled upstream.
|
228
|
+
3. When the SSH command fails because another thread is keeping the
|
229
|
+
lock of the `FractalSSH` object for a long time, mock the standard
|
230
|
+
output of the `squeue` command so that it looks like jobs are not
|
231
|
+
completed yet.
|
232
|
+
4. When the SSH command fails for other reasons, despite a forgiving
|
233
|
+
setup (7 connection attempts with base waiting interval of 2
|
234
|
+
seconds, with a cumulative timeout of 126 seconds), return an empty
|
235
|
+
string. This will be treated upstream as an empty `squeu` output,
|
236
|
+
indirectly resulting in marking the job as completed.
|
237
|
+
"""
|
238
|
+
|
239
|
+
if len(job_ids) == 0:
|
240
|
+
return ""
|
241
|
+
|
242
|
+
job_id_single_str = ",".join([str(j) for j in job_ids])
|
243
|
+
cmd = (
|
244
|
+
"squeue --noheader --format='%i %T' --states=all "
|
245
|
+
f"--jobs={job_id_single_str}"
|
246
|
+
)
|
247
|
+
|
248
|
+
try:
|
249
|
+
stdout = self.fractal_ssh.run_command(
|
250
|
+
cmd=cmd,
|
251
|
+
base_interval=base_interval,
|
252
|
+
max_attempts=max_attempts,
|
253
|
+
)
|
254
|
+
return stdout
|
255
|
+
except FractalSSHCommandError as e:
|
256
|
+
raise e
|
257
|
+
except FractalSSHTimeoutError:
|
258
|
+
logger.warning(
|
259
|
+
"[run_squeue] Could not acquire lock, use stdout placeholder."
|
260
|
+
)
|
261
|
+
FAKE_STATUS = "FRACTAL_STATUS_PLACEHOLDER"
|
262
|
+
placeholder_stdout = "\n".join(
|
263
|
+
[f"{job_id} {FAKE_STATUS}" for job_id in job_ids]
|
264
|
+
)
|
265
|
+
return placeholder_stdout
|
266
|
+
except Exception as e:
|
267
|
+
logger.error(f"Ignoring `squeue` command failure {e}")
|
268
|
+
return ""
|
@@ -176,6 +176,18 @@ class SudoSlurmRunner(BaseSlurmRunner):
|
|
176
176
|
)
|
177
177
|
return res.stdout
|
178
178
|
|
179
|
-
def
|
179
|
+
def run_squeue(self, job_ids: list[str]) -> str:
|
180
|
+
"""
|
181
|
+
Run `squeue` for a set of SLURM job IDs.
|
182
|
+
"""
|
183
|
+
|
184
|
+
if len(job_ids) == 0:
|
185
|
+
return ""
|
186
|
+
|
187
|
+
job_id_single_str = ",".join([str(j) for j in job_ids])
|
188
|
+
cmd = (
|
189
|
+
"squeue --noheader --format='%i %T' --states=all "
|
190
|
+
f"--jobs {job_id_single_str}"
|
191
|
+
)
|
180
192
|
res = _subprocess_run_or_raise(cmd)
|
181
193
|
return res.stdout
|
@@ -229,6 +229,9 @@ def run_v2_task_non_parallel(
|
|
229
229
|
exception=exception,
|
230
230
|
)
|
231
231
|
}
|
232
|
+
# NOTE: Here we don't have to handle the
|
233
|
+
# `outcome[0].exception is not None` branch, since for non_parallel
|
234
|
+
# tasks it was already handled within submit
|
232
235
|
if outcome[0].invalid_output:
|
233
236
|
with next(get_sync_db()) as db:
|
234
237
|
update_status_of_history_unit(
|
@@ -356,6 +359,9 @@ def run_v2_task_parallel(
|
|
356
359
|
result=results.get(ind, None),
|
357
360
|
exception=exceptions.get(ind, None),
|
358
361
|
)
|
362
|
+
# NOTE: Here we don't have to handle the
|
363
|
+
# `outcome[ind].exception is not None` branch, since for parallel
|
364
|
+
# tasks it was already handled within multisubmit
|
359
365
|
if outcome[ind].invalid_output:
|
360
366
|
with next(get_sync_db()) as db:
|
361
367
|
update_status_of_history_unit(
|
@@ -576,7 +582,12 @@ def run_v2_task_compound(
|
|
576
582
|
result=results.get(ind, None),
|
577
583
|
exception=exceptions.get(ind, None),
|
578
584
|
)
|
579
|
-
|
585
|
+
# NOTE: For compound task, `multisubmit` did not handle the
|
586
|
+
# `exception is not None` branch, therefore we have to include it here.
|
587
|
+
if (
|
588
|
+
compute_outcomes[ind].exception is not None
|
589
|
+
or compute_outcomes[ind].invalid_output
|
590
|
+
):
|
580
591
|
failure = True
|
581
592
|
|
582
593
|
# NOTE: For compound tasks, we update `HistoryUnit.status` from here,
|
fractal_server/ssh/_fabric.py
CHANGED
@@ -23,6 +23,18 @@ class FractalSSHTimeoutError(RuntimeError):
|
|
23
23
|
pass
|
24
24
|
|
25
25
|
|
26
|
+
class FractalSSHConnectionError(RuntimeError):
|
27
|
+
pass
|
28
|
+
|
29
|
+
|
30
|
+
class FractalSSHCommandError(RuntimeError):
|
31
|
+
pass
|
32
|
+
|
33
|
+
|
34
|
+
class FractalSSHUnknownError(RuntimeError):
|
35
|
+
pass
|
36
|
+
|
37
|
+
|
26
38
|
logger = set_logger(__name__)
|
27
39
|
|
28
40
|
|
@@ -170,7 +182,6 @@ class FractalSSH(object):
|
|
170
182
|
label="read_remote_json_file",
|
171
183
|
timeout=self.default_lock_timeout,
|
172
184
|
):
|
173
|
-
|
174
185
|
try:
|
175
186
|
with self._sftp_unsafe().open(filepath, "r") as f:
|
176
187
|
data = json.load(f)
|
@@ -263,7 +274,7 @@ class FractalSSH(object):
|
|
263
274
|
cmd: str,
|
264
275
|
allow_char: Optional[str] = None,
|
265
276
|
max_attempts: Optional[int] = None,
|
266
|
-
base_interval: Optional[
|
277
|
+
base_interval: Optional[float] = None,
|
267
278
|
lock_timeout: Optional[int] = None,
|
268
279
|
) -> str:
|
269
280
|
"""
|
@@ -311,7 +322,7 @@ class FractalSSH(object):
|
|
311
322
|
t_1 = time.perf_counter()
|
312
323
|
self.logger.info(
|
313
324
|
f"{prefix} END running '{cmd}' over SSH, "
|
314
|
-
f"elapsed {t_1-t_0:.3f}"
|
325
|
+
f"elapsed {t_1 - t_0:.3f}"
|
315
326
|
)
|
316
327
|
self.logger.debug("STDOUT:")
|
317
328
|
self.logger.debug(res.stdout)
|
@@ -329,12 +340,16 @@ class FractalSSH(object):
|
|
329
340
|
sleeptime = actual_base_interval**ind_attempt
|
330
341
|
self.logger.warning(
|
331
342
|
f"{prefix} Now sleep {sleeptime:.3f} "
|
332
|
-
"seconds and
|
343
|
+
"seconds and retry."
|
333
344
|
)
|
334
345
|
time.sleep(sleeptime)
|
335
346
|
else:
|
336
347
|
self.logger.error(f"{prefix} Reached last attempt")
|
337
|
-
|
348
|
+
raise FractalSSHConnectionError(
|
349
|
+
f"Reached last attempt "
|
350
|
+
f"({max_attempts=}) for running "
|
351
|
+
f"'{cmd}' over SSH"
|
352
|
+
)
|
338
353
|
except UnexpectedExit as e:
|
339
354
|
# Case 3: Command fails with an actual error
|
340
355
|
error_msg = (
|
@@ -342,18 +357,15 @@ class FractalSSH(object):
|
|
342
357
|
f"Original error:\n{str(e)}."
|
343
358
|
)
|
344
359
|
self.logger.error(error_msg)
|
345
|
-
raise
|
360
|
+
raise FractalSSHCommandError(error_msg)
|
361
|
+
except FractalSSHTimeoutError as e:
|
362
|
+
raise e
|
346
363
|
except Exception as e:
|
347
364
|
self.logger.error(
|
348
365
|
f"Running command `{cmd}` over SSH failed.\n"
|
349
366
|
f"Original Error:\n{str(e)}."
|
350
367
|
)
|
351
|
-
raise e
|
352
|
-
|
353
|
-
raise RuntimeError(
|
354
|
-
f"Reached last attempt ({max_attempts=}) for running "
|
355
|
-
f"'{cmd}' over SSH"
|
356
|
-
)
|
368
|
+
raise FractalSSHUnknownError(f"{type(e)}: {str(e)}")
|
357
369
|
|
358
370
|
def send_file(
|
359
371
|
self,
|
@@ -1,4 +1,4 @@
|
|
1
|
-
fractal_server/__init__.py,sha256=
|
1
|
+
fractal_server/__init__.py,sha256=B5mHrNKBuCS1_dfqSKK7a3mM57rWv7Sf9ODhxz6f23g,26
|
2
2
|
fractal_server/__main__.py,sha256=rkM8xjY1KeS3l63irB8yCrlVobR-73uDapC4wvrIlxI,6957
|
3
3
|
fractal_server/alembic.ini,sha256=MWwi7GzjzawI9cCAK1LW7NxIBQDUqD12-ptJoq5JpP0,3153
|
4
4
|
fractal_server/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -73,21 +73,21 @@ fractal_server/app/runner/executors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
|
|
73
73
|
fractal_server/app/runner/executors/base_runner.py,sha256=knWOERUwRLhsd9eq5GwGxH2ZVsvPOZRRjQPGbiExqcU,5052
|
74
74
|
fractal_server/app/runner/executors/local/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
75
75
|
fractal_server/app/runner/executors/local/get_local_config.py,sha256=KiakXxOahaLgWvQJ1LVGYGXht6DMGR9x8Xu-TuT9aY4,3628
|
76
|
-
fractal_server/app/runner/executors/local/runner.py,sha256=
|
76
|
+
fractal_server/app/runner/executors/local/runner.py,sha256=dPEpjIfJQu-st_tYiaI8VhH3y1uvK6DgfQ2cXU0vhOU,9543
|
77
77
|
fractal_server/app/runner/executors/slurm_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
78
78
|
fractal_server/app/runner/executors/slurm_common/_batching.py,sha256=ZY020JZlDS5mfpgpWTChQkyHU7iLE5kx2HVd57_C6XA,8850
|
79
79
|
fractal_server/app/runner/executors/slurm_common/_job_states.py,sha256=nuV-Zba38kDrRESOVB3gaGbrSPZc4q7YGichQaeqTW0,238
|
80
80
|
fractal_server/app/runner/executors/slurm_common/_slurm_config.py,sha256=_feRRnVVnvQa3AsOQqfULfOgaoj2o6Ze0-fwXwic8p4,15795
|
81
|
-
fractal_server/app/runner/executors/slurm_common/base_slurm_runner.py,sha256=
|
81
|
+
fractal_server/app/runner/executors/slurm_common/base_slurm_runner.py,sha256=S9BdLz7Enqx6hjH154LYas38b-t52mved0TUWCbMTyo,33118
|
82
82
|
fractal_server/app/runner/executors/slurm_common/get_slurm_config.py,sha256=BW6fDpPyB0VH5leVxvwzkVH3r3hC7DuSyoWmRzHITWg,7305
|
83
83
|
fractal_server/app/runner/executors/slurm_common/remote.py,sha256=EB2uASKjrBIr25oc13XvSwf8x-TpTBr9WuaLMwNr2y4,5850
|
84
84
|
fractal_server/app/runner/executors/slurm_common/slurm_job_task_models.py,sha256=RoxHLKOn0_wGjnY0Sv0a9nDSiqxYZHKRoMkT3p9_G1E,3607
|
85
85
|
fractal_server/app/runner/executors/slurm_common/utils_executors.py,sha256=naPyJI0I3lD-sYHbSXbMFGUBK4h_SggA5V91Z1Ch1Xg,1416
|
86
86
|
fractal_server/app/runner/executors/slurm_ssh/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
87
|
-
fractal_server/app/runner/executors/slurm_ssh/runner.py,sha256=
|
87
|
+
fractal_server/app/runner/executors/slurm_ssh/runner.py,sha256=5ppdV5D1N6v3T2QUGBn1Q7dswcUKIpI6ZjX_yIO_Z9A,9439
|
88
88
|
fractal_server/app/runner/executors/slurm_sudo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
89
89
|
fractal_server/app/runner/executors/slurm_sudo/_subprocess_run_as_user.py,sha256=O1bNg1DiSDJmQE0RmOk2Ii47DagiXp5ryd0R6KxO2OM,3177
|
90
|
-
fractal_server/app/runner/executors/slurm_sudo/runner.py,sha256=
|
90
|
+
fractal_server/app/runner/executors/slurm_sudo/runner.py,sha256=lPWkRT499mChP3dNLrdDjMT-nw7-LWv6g58kdF_sMRw,6290
|
91
91
|
fractal_server/app/runner/extract_archive.py,sha256=I7UGIHXXuFvlgVPsP7GMWPu2-DiS1EiyBs7a1bvgkxI,2458
|
92
92
|
fractal_server/app/runner/filenames.py,sha256=lPnxKHtdRizr6FqG3zOdjDPyWA7GoaJGTtiuJV0gA8E,70
|
93
93
|
fractal_server/app/runner/run_subprocess.py,sha256=c3JbYXq3hX2aaflQU19qJ5Xs6J6oXGNvnTEoAfv2bxc,959
|
@@ -102,7 +102,7 @@ fractal_server/app/runner/v2/db_tools.py,sha256=du5dKhMMFMErQXbGIgu9JvO_vtMensod
|
|
102
102
|
fractal_server/app/runner/v2/deduplicate_list.py,sha256=IVTE4abBU1bUprFTkxrTfYKnvkNTanWQ-KWh_etiT08,645
|
103
103
|
fractal_server/app/runner/v2/merge_outputs.py,sha256=D1L4Taieq9i71SPQyNc1kMokgHh-sV_MqF3bv7QMDBc,907
|
104
104
|
fractal_server/app/runner/v2/runner.py,sha256=B4kAF1S-zHf2PbyHedfuiaNpu4oslVDp33KgXYcoXIk,15706
|
105
|
-
fractal_server/app/runner/v2/runner_functions.py,sha256=
|
105
|
+
fractal_server/app/runner/v2/runner_functions.py,sha256=2W6CFkezUsQ_k8YuC2oOEMtB_-7M9ensyhwCFvlS2No,19096
|
106
106
|
fractal_server/app/runner/v2/runner_functions_low_level.py,sha256=_h_OOffq3d7V0uHa8Uvs0mj31y1GSZBUXjDDF3WjVjY,3620
|
107
107
|
fractal_server/app/runner/v2/submit_workflow.py,sha256=QywUGIoHAHnrWgfnyX8W9kVqKY-RvVyNLpzrbsXZOZ4,13075
|
108
108
|
fractal_server/app/runner/v2/task_interface.py,sha256=IXdQTI8rXFgXv1Ez0js4CjKFf3QwO2GCHRTuwiFtiTQ,2891
|
@@ -179,7 +179,7 @@ fractal_server/migrations/versions/f384e1c0cf5d_drop_task_default_args_columns.p
|
|
179
179
|
fractal_server/migrations/versions/fbce16ff4e47_new_history_items.py,sha256=TDWCaIoM0Q4SpRWmR9zr_rdp3lJXhCfBPTMhtrP5xYE,3950
|
180
180
|
fractal_server/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
181
181
|
fractal_server/ssh/__init__.py,sha256=sVUmzxf7_DuXG1xoLQ1_00fo5NPhi2LJipSmU5EAkPs,124
|
182
|
-
fractal_server/ssh/_fabric.py,sha256=
|
182
|
+
fractal_server/ssh/_fabric.py,sha256=Do7wX1xsV3Pjmwqg-Z_X1_QM05RN5-sAowO_Hh7-9bk,23324
|
183
183
|
fractal_server/string_tools.py,sha256=niViRrrZAOo0y6pEFI9L_eUYS1PoOiQZUBtngiLc2_k,1877
|
184
184
|
fractal_server/syringe.py,sha256=3qSMW3YaMKKnLdgnooAINOPxnCOxP7y2jeAQYB21Gdo,2786
|
185
185
|
fractal_server/tasks/__init__.py,sha256=kadmVUoIghl8s190_Tt-8f-WBqMi8u8oU4Pvw39NHE8,23
|
@@ -209,8 +209,8 @@ fractal_server/tasks/v2/utils_templates.py,sha256=Kc_nSzdlV6KIsO0CQSPs1w70zLyENP
|
|
209
209
|
fractal_server/urls.py,sha256=QjIKAC1a46bCdiPMu3AlpgFbcv6a4l3ABcd5xz190Og,471
|
210
210
|
fractal_server/utils.py,sha256=PMwrxWFxRTQRl1b9h-NRIbFGPKqpH_hXnkAT3NfZdpY,3571
|
211
211
|
fractal_server/zip_tools.py,sha256=GjDgo_sf6V_DDg6wWeBlZu5zypIxycn_l257p_YVKGc,4876
|
212
|
-
fractal_server-2.14.
|
213
|
-
fractal_server-2.14.
|
214
|
-
fractal_server-2.14.
|
215
|
-
fractal_server-2.14.
|
216
|
-
fractal_server-2.14.
|
212
|
+
fractal_server-2.14.0a34.dist-info/LICENSE,sha256=QKAharUuhxL58kSoLizKJeZE3mTCBnX6ucmz8W0lxlk,1576
|
213
|
+
fractal_server-2.14.0a34.dist-info/METADATA,sha256=YljYi9W71066fSXY2MIAuZQ_P1AqIhfyTECxk78i4og,4563
|
214
|
+
fractal_server-2.14.0a34.dist-info/WHEEL,sha256=7dDg4QLnNKTvwIDR9Ac8jJaAmBC_owJrckbC0jjThyA,88
|
215
|
+
fractal_server-2.14.0a34.dist-info/entry_points.txt,sha256=8tV2kynvFkjnhbtDnxAqImL6HMVKsopgGfew0DOp5UY,58
|
216
|
+
fractal_server-2.14.0a34.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|