fractal-server 2.14.4a0__py3-none-any.whl → 2.14.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/db/__init__.py +2 -2
- fractal_server/app/models/security.py +8 -8
- fractal_server/app/models/user_settings.py +8 -10
- fractal_server/app/models/v2/accounting.py +2 -3
- fractal_server/app/models/v2/dataset.py +1 -2
- fractal_server/app/models/v2/history.py +3 -4
- fractal_server/app/models/v2/job.py +10 -11
- fractal_server/app/models/v2/project.py +1 -2
- fractal_server/app/models/v2/task.py +13 -14
- fractal_server/app/models/v2/task_group.py +15 -16
- fractal_server/app/models/v2/workflow.py +1 -2
- fractal_server/app/models/v2/workflowtask.py +6 -7
- fractal_server/app/routes/admin/v2/accounting.py +3 -4
- fractal_server/app/routes/admin/v2/job.py +13 -14
- fractal_server/app/routes/admin/v2/project.py +2 -4
- fractal_server/app/routes/admin/v2/task.py +11 -13
- fractal_server/app/routes/admin/v2/task_group.py +15 -17
- fractal_server/app/routes/admin/v2/task_group_lifecycle.py +5 -8
- fractal_server/app/routes/api/v2/__init__.py +2 -0
- fractal_server/app/routes/api/v2/_aux_functions.py +7 -9
- fractal_server/app/routes/api/v2/_aux_functions_history.py +1 -1
- fractal_server/app/routes/api/v2/_aux_functions_task_lifecycle.py +1 -3
- fractal_server/app/routes/api/v2/_aux_functions_tasks.py +5 -6
- fractal_server/app/routes/api/v2/dataset.py +6 -8
- fractal_server/app/routes/api/v2/history.py +5 -8
- fractal_server/app/routes/api/v2/images.py +2 -3
- fractal_server/app/routes/api/v2/job.py +5 -6
- fractal_server/app/routes/api/v2/pre_submission_checks.py +1 -3
- fractal_server/app/routes/api/v2/project.py +2 -4
- fractal_server/app/routes/api/v2/status_legacy.py +2 -4
- fractal_server/app/routes/api/v2/submit.py +3 -4
- fractal_server/app/routes/api/v2/task.py +6 -7
- fractal_server/app/routes/api/v2/task_collection.py +11 -13
- fractal_server/app/routes/api/v2/task_collection_custom.py +4 -4
- fractal_server/app/routes/api/v2/task_group.py +6 -8
- fractal_server/app/routes/api/v2/task_group_lifecycle.py +6 -9
- fractal_server/app/routes/api/v2/task_version_update.py +270 -0
- fractal_server/app/routes/api/v2/workflow.py +5 -6
- fractal_server/app/routes/api/v2/workflow_import.py +3 -5
- fractal_server/app/routes/api/v2/workflowtask.py +2 -114
- fractal_server/app/routes/auth/current_user.py +2 -2
- fractal_server/app/routes/pagination.py +2 -3
- fractal_server/app/runner/exceptions.py +16 -22
- fractal_server/app/runner/executors/base_runner.py +19 -7
- fractal_server/app/runner/executors/call_command_wrapper.py +52 -0
- fractal_server/app/runner/executors/local/get_local_config.py +2 -3
- fractal_server/app/runner/executors/local/runner.py +52 -13
- fractal_server/app/runner/executors/slurm_common/_batching.py +2 -3
- fractal_server/app/runner/executors/slurm_common/_slurm_config.py +27 -29
- fractal_server/app/runner/executors/slurm_common/base_slurm_runner.py +95 -63
- fractal_server/app/runner/executors/slurm_common/get_slurm_config.py +2 -3
- fractal_server/app/runner/executors/slurm_common/remote.py +47 -92
- fractal_server/app/runner/executors/slurm_common/slurm_job_task_models.py +22 -22
- fractal_server/app/runner/executors/slurm_ssh/run_subprocess.py +2 -3
- fractal_server/app/runner/executors/slurm_ssh/runner.py +4 -6
- fractal_server/app/runner/executors/slurm_sudo/_subprocess_run_as_user.py +2 -6
- fractal_server/app/runner/executors/slurm_sudo/runner.py +9 -18
- fractal_server/app/runner/set_start_and_last_task_index.py +2 -5
- fractal_server/app/runner/shutdown.py +5 -11
- fractal_server/app/runner/task_files.py +3 -13
- fractal_server/app/runner/v2/_local.py +3 -4
- fractal_server/app/runner/v2/_slurm_ssh.py +5 -7
- fractal_server/app/runner/v2/_slurm_sudo.py +8 -10
- fractal_server/app/runner/v2/runner.py +4 -5
- fractal_server/app/runner/v2/runner_functions.py +20 -35
- fractal_server/app/runner/v2/submit_workflow.py +7 -10
- fractal_server/app/runner/v2/task_interface.py +2 -3
- fractal_server/app/runner/versions.py +3 -13
- fractal_server/app/schemas/user.py +2 -4
- fractal_server/app/schemas/user_group.py +1 -2
- fractal_server/app/schemas/user_settings.py +19 -21
- fractal_server/app/schemas/v2/dataset.py +2 -3
- fractal_server/app/schemas/v2/dumps.py +13 -15
- fractal_server/app/schemas/v2/history.py +6 -7
- fractal_server/app/schemas/v2/job.py +17 -18
- fractal_server/app/schemas/v2/manifest.py +12 -13
- fractal_server/app/schemas/v2/status_legacy.py +2 -2
- fractal_server/app/schemas/v2/task.py +29 -30
- fractal_server/app/schemas/v2/task_collection.py +8 -9
- fractal_server/app/schemas/v2/task_group.py +22 -23
- fractal_server/app/schemas/v2/workflow.py +1 -2
- fractal_server/app/schemas/v2/workflowtask.py +27 -29
- fractal_server/app/security/__init__.py +10 -12
- fractal_server/config.py +32 -42
- fractal_server/images/models.py +2 -4
- fractal_server/images/tools.py +4 -7
- fractal_server/logger.py +3 -5
- fractal_server/ssh/_fabric.py +41 -13
- fractal_server/string_tools.py +2 -2
- fractal_server/syringe.py +1 -1
- fractal_server/tasks/v2/local/collect.py +2 -3
- fractal_server/tasks/v2/local/deactivate.py +1 -1
- fractal_server/tasks/v2/local/reactivate.py +1 -1
- fractal_server/tasks/v2/ssh/collect.py +256 -245
- fractal_server/tasks/v2/ssh/deactivate.py +210 -187
- fractal_server/tasks/v2/ssh/reactivate.py +154 -146
- fractal_server/tasks/v2/utils_background.py +2 -3
- fractal_server/types/__init__.py +1 -2
- fractal_server/types/validators/_filter_validators.py +1 -2
- fractal_server/utils.py +4 -5
- fractal_server/zip_tools.py +1 -1
- {fractal_server-2.14.4a0.dist-info → fractal_server-2.14.6.dist-info}/METADATA +2 -9
- {fractal_server-2.14.4a0.dist-info → fractal_server-2.14.6.dist-info}/RECORD +107 -108
- fractal_server/app/history/__init__.py +0 -0
- fractal_server/app/runner/executors/slurm_common/utils_executors.py +0 -58
- fractal_server/app/runner/v2/runner_functions_low_level.py +0 -122
- {fractal_server-2.14.4a0.dist-info → fractal_server-2.14.6.dist-info}/LICENSE +0 -0
- {fractal_server-2.14.4a0.dist-info → fractal_server-2.14.6.dist-info}/WHEEL +0 -0
- {fractal_server-2.14.4a0.dist-info → fractal_server-2.14.6.dist-info}/entry_points.txt +0 -0
@@ -5,9 +5,9 @@ import time
|
|
5
5
|
from pathlib import Path
|
6
6
|
from typing import Any
|
7
7
|
from typing import Literal
|
8
|
-
from typing import Optional
|
9
8
|
|
10
|
-
import
|
9
|
+
from pydantic import BaseModel
|
10
|
+
from pydantic import ConfigDict
|
11
11
|
|
12
12
|
from ..slurm_common._slurm_config import SlurmConfig
|
13
13
|
from ..slurm_common.slurm_job_task_models import SlurmJob
|
@@ -36,6 +36,17 @@ SHUTDOWN_EXCEPTION = JobExecutionError(SHUTDOWN_ERROR_MESSAGE)
|
|
36
36
|
logger = set_logger(__name__)
|
37
37
|
|
38
38
|
|
39
|
+
class RemoteInputData(BaseModel):
|
40
|
+
model_config = ConfigDict(extra="forbid")
|
41
|
+
|
42
|
+
python_version: tuple[int, int, int]
|
43
|
+
fractal_server_version: str
|
44
|
+
full_command: str
|
45
|
+
|
46
|
+
metadiff_file_remote: str
|
47
|
+
log_file_remote: str
|
48
|
+
|
49
|
+
|
39
50
|
def create_accounting_record_slurm(
|
40
51
|
*,
|
41
52
|
user_id: int,
|
@@ -69,9 +80,9 @@ class BaseSlurmRunner(BaseRunner):
|
|
69
80
|
root_dir_remote: Path,
|
70
81
|
slurm_runner_type: Literal["ssh", "sudo"],
|
71
82
|
python_worker_interpreter: str,
|
72
|
-
common_script_lines:
|
73
|
-
user_cache_dir:
|
74
|
-
poll_interval:
|
83
|
+
common_script_lines: list[str] | None = None,
|
84
|
+
user_cache_dir: str | None = None,
|
85
|
+
poll_interval: int | None = None,
|
75
86
|
):
|
76
87
|
self.slurm_runner_type = slurm_runner_type
|
77
88
|
self.root_dir_local = root_dir_local
|
@@ -121,7 +132,6 @@ class BaseSlurmRunner(BaseRunner):
|
|
121
132
|
raise NotImplementedError("Implement in child class.")
|
122
133
|
|
123
134
|
def _get_finished_jobs(self, job_ids: list[str]) -> set[str]:
|
124
|
-
|
125
135
|
# If there is no Slurm job to check, return right away
|
126
136
|
if not job_ids:
|
127
137
|
return set()
|
@@ -168,59 +178,73 @@ class BaseSlurmRunner(BaseRunner):
|
|
168
178
|
|
169
179
|
def _submit_single_sbatch(
|
170
180
|
self,
|
171
|
-
|
181
|
+
*,
|
182
|
+
base_command: str,
|
172
183
|
slurm_job: SlurmJob,
|
173
184
|
slurm_config: SlurmConfig,
|
174
185
|
) -> str:
|
175
186
|
logger.debug("[_submit_single_sbatch] START")
|
176
|
-
|
177
|
-
versions = dict(
|
178
|
-
python=sys.version_info[:3],
|
179
|
-
cloudpickle=cloudpickle.__version__,
|
180
|
-
fractal_server=__VERSION__,
|
181
|
-
)
|
187
|
+
|
182
188
|
for task in slurm_job.tasks:
|
183
|
-
# Write input
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
189
|
+
# Write input file
|
190
|
+
if self.slurm_runner_type == "ssh":
|
191
|
+
args_file_remote = task.task_files.args_file_remote
|
192
|
+
else:
|
193
|
+
args_file_remote = task.task_files.args_file_local
|
194
|
+
metadiff_file_remote = task.task_files.metadiff_file_remote
|
195
|
+
full_command = (
|
196
|
+
f"{base_command} "
|
197
|
+
f"--args-json {args_file_remote} "
|
198
|
+
f"--out-json {metadiff_file_remote}"
|
188
199
|
)
|
189
|
-
|
190
|
-
|
191
|
-
|
200
|
+
|
201
|
+
input_data = RemoteInputData(
|
202
|
+
full_command=full_command,
|
203
|
+
python_version=sys.version_info[:3],
|
204
|
+
fractal_server_version=__VERSION__,
|
205
|
+
metadiff_file_remote=task.task_files.metadiff_file_remote,
|
206
|
+
log_file_remote=task.task_files.log_file_remote,
|
207
|
+
)
|
208
|
+
|
209
|
+
with open(task.input_file_local, "w") as f:
|
210
|
+
json.dump(input_data.model_dump(), f, indent=2)
|
211
|
+
|
212
|
+
with open(task.task_files.args_file_local, "w") as f:
|
213
|
+
json.dump(task.parameters, f, indent=2)
|
214
|
+
|
192
215
|
logger.debug(
|
193
|
-
"[_submit_single_sbatch] Written "
|
194
|
-
f"{task.input_pickle_file_local=}"
|
216
|
+
"[_submit_single_sbatch] Written " f"{task.input_file_local=}"
|
195
217
|
)
|
196
218
|
|
197
219
|
if self.slurm_runner_type == "ssh":
|
198
|
-
# Send input
|
220
|
+
# Send input file (only relevant for SSH)
|
221
|
+
self.fractal_ssh.send_file(
|
222
|
+
local=task.input_file_local,
|
223
|
+
remote=task.input_file_remote,
|
224
|
+
)
|
199
225
|
self.fractal_ssh.send_file(
|
200
|
-
local=task.
|
201
|
-
remote=task.
|
226
|
+
local=task.task_files.args_file_local,
|
227
|
+
remote=task.task_files.args_file_remote,
|
202
228
|
)
|
203
229
|
logger.debug(
|
204
230
|
"[_submit_single_sbatch] Transferred "
|
205
|
-
f"{task.
|
231
|
+
f"{task.input_file_local=}"
|
206
232
|
)
|
207
233
|
|
208
234
|
# Prepare commands to be included in SLURM submission script
|
209
235
|
cmdlines = []
|
210
236
|
for task in slurm_job.tasks:
|
211
237
|
if self.slurm_runner_type == "ssh":
|
212
|
-
|
238
|
+
input_file = task.input_file_remote
|
213
239
|
else:
|
214
|
-
|
215
|
-
|
240
|
+
input_file = task.input_file_local
|
241
|
+
output_file = task.output_file_remote
|
216
242
|
cmdlines.append(
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
f"--output-file {output_pickle_file}"
|
223
|
-
)
|
243
|
+
f"{self.python_worker_interpreter}"
|
244
|
+
" -m fractal_server.app.runner."
|
245
|
+
"executors.slurm_common.remote "
|
246
|
+
f"--input-file {input_file} "
|
247
|
+
f"--output-file {output_file}"
|
224
248
|
)
|
225
249
|
|
226
250
|
# Set ntasks
|
@@ -363,12 +387,12 @@ class BaseSlurmRunner(BaseRunner):
|
|
363
387
|
was_job_scancelled: bool = False,
|
364
388
|
) -> tuple[Any, Exception]:
|
365
389
|
try:
|
366
|
-
with open(task.
|
367
|
-
|
368
|
-
success
|
390
|
+
with open(task.output_file_local) as f:
|
391
|
+
output = json.load(f)
|
392
|
+
success = output[0]
|
369
393
|
if success:
|
370
394
|
# Task succeeded
|
371
|
-
result = output
|
395
|
+
result = output[1]
|
372
396
|
return (result, None)
|
373
397
|
else:
|
374
398
|
# Task failed in a controlled way, and produced an `output`
|
@@ -376,21 +400,18 @@ class BaseSlurmRunner(BaseRunner):
|
|
376
400
|
# `exc_type_name` and `traceback_string` and with optional
|
377
401
|
# keys `workflow_task_order`, `workflow_task_id` and
|
378
402
|
# `task_name`.
|
379
|
-
|
403
|
+
exc_proxy = output[1]
|
404
|
+
exc_type_name = exc_proxy.get("exc_type_name")
|
380
405
|
logger.debug(
|
381
|
-
f"Output
|
406
|
+
f"Output file contains a '{exc_type_name}' exception."
|
407
|
+
)
|
408
|
+
traceback_string = output[1].get("traceback_string")
|
409
|
+
exception = TaskExecutionError(
|
410
|
+
traceback_string,
|
411
|
+
workflow_task_id=task.workflow_task_id,
|
412
|
+
workflow_task_order=task.workflow_task_order,
|
413
|
+
task_name=task.task_name,
|
382
414
|
)
|
383
|
-
traceback_string = output.get("traceback_string")
|
384
|
-
kwargs = {
|
385
|
-
key: output[key]
|
386
|
-
for key in [
|
387
|
-
"workflow_task_order",
|
388
|
-
"workflow_task_id",
|
389
|
-
"task_name",
|
390
|
-
]
|
391
|
-
if key in output.keys()
|
392
|
-
}
|
393
|
-
exception = TaskExecutionError(traceback_string, **kwargs)
|
394
415
|
return (None, exception)
|
395
416
|
|
396
417
|
except Exception as e:
|
@@ -405,8 +426,8 @@ class BaseSlurmRunner(BaseRunner):
|
|
405
426
|
exception = SHUTDOWN_EXCEPTION
|
406
427
|
return (None, exception)
|
407
428
|
finally:
|
408
|
-
Path(task.
|
409
|
-
Path(task.
|
429
|
+
Path(task.input_file_local).unlink(missing_ok=True)
|
430
|
+
Path(task.output_file_local).unlink(missing_ok=True)
|
410
431
|
|
411
432
|
def is_shutdown(self) -> bool:
|
412
433
|
return self.shutdown_file.exists()
|
@@ -451,7 +472,10 @@ class BaseSlurmRunner(BaseRunner):
|
|
451
472
|
|
452
473
|
def submit(
|
453
474
|
self,
|
454
|
-
|
475
|
+
base_command: str,
|
476
|
+
workflow_task_order: int,
|
477
|
+
workflow_task_id: int,
|
478
|
+
task_name: str,
|
455
479
|
parameters: dict[str, Any],
|
456
480
|
history_unit_id: int,
|
457
481
|
task_files: TaskFiles,
|
@@ -507,13 +531,16 @@ class BaseSlurmRunner(BaseRunner):
|
|
507
531
|
workdir_remote=workdir_remote,
|
508
532
|
workdir_local=workdir_local,
|
509
533
|
task_files=task_files,
|
534
|
+
workflow_task_order=workflow_task_order,
|
535
|
+
workflow_task_id=workflow_task_id,
|
536
|
+
task_name=task_name,
|
510
537
|
)
|
511
538
|
],
|
512
539
|
)
|
513
540
|
|
514
541
|
config.parallel_tasks_per_job = 1
|
515
542
|
self._submit_single_sbatch(
|
516
|
-
|
543
|
+
base_command=base_command,
|
517
544
|
slurm_job=slurm_job,
|
518
545
|
slurm_config=config,
|
519
546
|
)
|
@@ -586,7 +613,10 @@ class BaseSlurmRunner(BaseRunner):
|
|
586
613
|
|
587
614
|
def multisubmit(
|
588
615
|
self,
|
589
|
-
|
616
|
+
base_command: str,
|
617
|
+
workflow_task_order: int,
|
618
|
+
workflow_task_id: int,
|
619
|
+
task_name: str,
|
590
620
|
list_parameters: list[dict],
|
591
621
|
history_unit_ids: list[int],
|
592
622
|
list_task_files: list[TaskFiles],
|
@@ -602,7 +632,6 @@ class BaseSlurmRunner(BaseRunner):
|
|
602
632
|
|
603
633
|
logger.debug(f"[multisubmit] START, {len(list_parameters)=}")
|
604
634
|
try:
|
605
|
-
|
606
635
|
if self.is_shutdown():
|
607
636
|
if task_type == "parallel":
|
608
637
|
with next(get_sync_db()) as db:
|
@@ -672,6 +701,9 @@ class BaseSlurmRunner(BaseRunner):
|
|
672
701
|
parameters=parameters,
|
673
702
|
zarr_url=parameters["zarr_url"],
|
674
703
|
task_files=list_task_files[index],
|
704
|
+
workflow_task_order=workflow_task_order,
|
705
|
+
workflow_task_id=workflow_task_id,
|
706
|
+
task_name=task_name,
|
675
707
|
),
|
676
708
|
)
|
677
709
|
jobs_to_submit.append(
|
@@ -687,7 +719,7 @@ class BaseSlurmRunner(BaseRunner):
|
|
687
719
|
logger.debug("[multisubmit] Transfer files and submit jobs.")
|
688
720
|
for slurm_job in jobs_to_submit:
|
689
721
|
self._submit_single_sbatch(
|
690
|
-
|
722
|
+
base_command=base_command,
|
691
723
|
slurm_job=slurm_job,
|
692
724
|
slurm_config=config,
|
693
725
|
)
|
@@ -850,8 +882,8 @@ class BaseSlurmRunner(BaseRunner):
|
|
850
882
|
"""
|
851
883
|
Check that a list of `SlurmJob`s have homogeneous working folders.
|
852
884
|
"""
|
853
|
-
set_workdir_local =
|
854
|
-
set_workdir_remote =
|
885
|
+
set_workdir_local = {_job.workdir_local for _job in slurm_jobs}
|
886
|
+
set_workdir_remote = {_job.workdir_remote for _job in slurm_jobs}
|
855
887
|
if len(set_workdir_local) > 1:
|
856
888
|
raise ValueError(f"Non-unique values in {set_workdir_local=}.")
|
857
889
|
if len(set_workdir_remote) > 1:
|
@@ -1,6 +1,5 @@
|
|
1
1
|
from pathlib import Path
|
2
2
|
from typing import Literal
|
3
|
-
from typing import Optional
|
4
3
|
|
5
4
|
from ._batching import heuristics
|
6
5
|
from ._slurm_config import _parse_mem_value
|
@@ -14,7 +13,7 @@ from fractal_server.app.models.v2 import WorkflowTaskV2
|
|
14
13
|
def get_slurm_config_internal(
|
15
14
|
wftask: WorkflowTaskV2,
|
16
15
|
which_type: Literal["non_parallel", "parallel"],
|
17
|
-
config_path:
|
16
|
+
config_path: Path | None = None,
|
18
17
|
) -> SlurmConfig:
|
19
18
|
"""
|
20
19
|
Prepare a `SlurmConfig` configuration object
|
@@ -168,7 +167,7 @@ def get_slurm_config_internal(
|
|
168
167
|
def get_slurm_config(
|
169
168
|
wftask: WorkflowTaskV2,
|
170
169
|
which_type: Literal["non_parallel", "parallel"],
|
171
|
-
config_path:
|
170
|
+
config_path: Path | None = None,
|
172
171
|
tot_tasks: int = 1,
|
173
172
|
) -> SlurmConfig:
|
174
173
|
config = get_slurm_config_internal(
|
@@ -1,27 +1,10 @@
|
|
1
|
-
# This adapts clusterfutures <https://github.com/sampsyo/clusterfutures>
|
2
|
-
# Original Copyright
|
3
|
-
# Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
|
4
|
-
# License: MIT
|
5
|
-
#
|
6
|
-
# Modified by:
|
7
|
-
# Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
|
8
|
-
# Tommaso Comparin <tommaso.comparin@exact-lab.it>
|
9
|
-
#
|
10
|
-
# Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
|
11
|
-
# University of Zurich
|
12
|
-
"""
|
13
|
-
This module provides a simple self-standing script that executes arbitrary
|
14
|
-
python code received via pickled files on a cluster node.
|
15
|
-
"""
|
16
1
|
import argparse
|
2
|
+
import json
|
17
3
|
import logging
|
18
4
|
import os
|
19
5
|
import sys
|
20
|
-
from typing import Literal
|
21
|
-
from typing import Union
|
22
|
-
|
23
|
-
import cloudpickle
|
24
6
|
|
7
|
+
from ..call_command_wrapper import call_command_wrapper
|
25
8
|
from fractal_server import __VERSION__
|
26
9
|
|
27
10
|
|
@@ -33,59 +16,6 @@ class FractalVersionMismatch(RuntimeError):
|
|
33
16
|
pass
|
34
17
|
|
35
18
|
|
36
|
-
def _check_versions_mismatch(
|
37
|
-
server_versions: dict[
|
38
|
-
Literal["python", "fractal_server", "cloudpickle"],
|
39
|
-
Union[str, tuple[int]],
|
40
|
-
]
|
41
|
-
):
|
42
|
-
"""
|
43
|
-
Compare the server {python,cloudpickle,fractal_server} versions with the
|
44
|
-
ones available to the current worker
|
45
|
-
|
46
|
-
Arguments:
|
47
|
-
server_versions:
|
48
|
-
The version used in the fractal-server instance that created the
|
49
|
-
cloudpickle file
|
50
|
-
|
51
|
-
Raises:
|
52
|
-
FractalVersionMismatch: If the cloudpickle or fractal_server versions
|
53
|
-
do not match with the ones on the server
|
54
|
-
"""
|
55
|
-
|
56
|
-
server_python_version = list(server_versions["python"])
|
57
|
-
worker_python_version = list(sys.version_info[:3])
|
58
|
-
if worker_python_version != server_python_version:
|
59
|
-
if worker_python_version[:2] != server_python_version[:2]:
|
60
|
-
# FIXME: Turn this into an error, in some version post 2.14.
|
61
|
-
logging.error(
|
62
|
-
f"{server_python_version=} but {worker_python_version=}. "
|
63
|
-
"This configuration will be deprecated in a future version, "
|
64
|
-
"please contact the admin of this Fractal instance."
|
65
|
-
)
|
66
|
-
else:
|
67
|
-
# Major.minor versions match, patch versions differ
|
68
|
-
logging.warning(
|
69
|
-
f"{server_python_version=} but {worker_python_version=}."
|
70
|
-
)
|
71
|
-
|
72
|
-
server_cloudpickle_version = server_versions["cloudpickle"]
|
73
|
-
worker_cloudpickle_version = cloudpickle.__version__
|
74
|
-
if worker_cloudpickle_version != server_cloudpickle_version:
|
75
|
-
raise FractalVersionMismatch(
|
76
|
-
f"{server_cloudpickle_version=} but "
|
77
|
-
f"{worker_cloudpickle_version=}"
|
78
|
-
)
|
79
|
-
|
80
|
-
server_fractal_server_version = server_versions["fractal_server"]
|
81
|
-
worker_fractal_server_version = __VERSION__
|
82
|
-
if worker_fractal_server_version != server_fractal_server_version:
|
83
|
-
raise FractalVersionMismatch(
|
84
|
-
f"{server_fractal_server_version=} but "
|
85
|
-
f"{worker_fractal_server_version=}"
|
86
|
-
)
|
87
|
-
|
88
|
-
|
89
19
|
def worker(
|
90
20
|
*,
|
91
21
|
in_fname: str,
|
@@ -95,8 +25,8 @@ def worker(
|
|
95
25
|
Execute a job, possibly on a remote node.
|
96
26
|
|
97
27
|
Arguments:
|
98
|
-
in_fname: Absolute path to the input
|
99
|
-
out_fname: Absolute path of the output
|
28
|
+
in_fname: Absolute path to the input file (must be readable).
|
29
|
+
out_fname: Absolute path of the output file (must be writeable).
|
100
30
|
"""
|
101
31
|
|
102
32
|
# Create output folder, if missing
|
@@ -107,19 +37,49 @@ def worker(
|
|
107
37
|
|
108
38
|
# Execute the job and capture exceptions
|
109
39
|
try:
|
110
|
-
with open(in_fname
|
111
|
-
|
112
|
-
|
113
|
-
|
40
|
+
with open(in_fname) as f:
|
41
|
+
input_data = json.load(f)
|
42
|
+
|
43
|
+
server_python_version = input_data["python_version"]
|
44
|
+
server_fractal_server_version = input_data["fractal_server_version"]
|
45
|
+
|
46
|
+
# Fractal-server version must be identical
|
47
|
+
worker_fractal_server_version = __VERSION__
|
48
|
+
if worker_fractal_server_version != server_fractal_server_version:
|
49
|
+
raise FractalVersionMismatch(
|
50
|
+
f"{server_fractal_server_version=} but "
|
51
|
+
f"{worker_fractal_server_version=}"
|
52
|
+
)
|
53
|
+
|
54
|
+
# Python version mismatch only raises a warning
|
55
|
+
worker_python_version = tuple(sys.version_info[:3])
|
56
|
+
if worker_python_version != server_python_version:
|
57
|
+
if worker_python_version[:2] != server_python_version[:2]:
|
58
|
+
logging.warning(
|
59
|
+
f"{server_python_version=} but {worker_python_version=}."
|
60
|
+
)
|
61
|
+
|
62
|
+
# Extract some useful paths
|
63
|
+
metadiff_file_remote = input_data["metadiff_file_remote"]
|
64
|
+
log_path = input_data["log_file_remote"]
|
65
|
+
|
66
|
+
# Execute command
|
67
|
+
full_command = input_data["full_command"]
|
68
|
+
call_command_wrapper(cmd=full_command, log_path=log_path)
|
69
|
+
|
70
|
+
try:
|
71
|
+
with open(metadiff_file_remote) as f:
|
72
|
+
out_meta = json.load(f)
|
73
|
+
result = (True, out_meta)
|
74
|
+
except FileNotFoundError:
|
75
|
+
# Command completed, but it produced no metadiff file
|
76
|
+
result = (True, None)
|
114
77
|
|
115
|
-
result = (True, fun(*args, **kwargs))
|
116
|
-
out = cloudpickle.dumps(result)
|
117
78
|
except Exception as e:
|
118
79
|
# Exception objects are not serialisable. Here we save the relevant
|
119
80
|
# exception contents in a serializable dictionary. Note that whenever
|
120
81
|
# the task failed "properly", the exception is a `TaskExecutionError`
|
121
82
|
# and it has additional attributes.
|
122
|
-
|
123
83
|
import traceback
|
124
84
|
|
125
85
|
exc_type, exc_value, traceback_obj = sys.exc_info()
|
@@ -131,33 +91,28 @@ def worker(
|
|
131
91
|
)
|
132
92
|
traceback_string = "".join(traceback_list)
|
133
93
|
exc_proxy = dict(
|
134
|
-
exc_type_name=
|
94
|
+
exc_type_name=type(e).__name__,
|
135
95
|
traceback_string=traceback_string,
|
136
|
-
workflow_task_order=getattr(e, "workflow_task_order", None),
|
137
|
-
workflow_task_id=getattr(e, "workflow_task_id", None),
|
138
|
-
task_name=getattr(e, "task_name", None),
|
139
96
|
)
|
140
97
|
result = (False, exc_proxy)
|
141
|
-
out = cloudpickle.dumps(result)
|
142
98
|
|
143
|
-
# Write
|
144
|
-
with open(out_fname, "
|
145
|
-
|
99
|
+
# Write output file
|
100
|
+
with open(out_fname, "w") as f:
|
101
|
+
json.dump(result, f, indent=2)
|
146
102
|
|
147
103
|
|
148
104
|
if __name__ == "__main__":
|
149
|
-
|
150
105
|
parser = argparse.ArgumentParser()
|
151
106
|
parser.add_argument(
|
152
107
|
"--input-file",
|
153
108
|
type=str,
|
154
|
-
help="Path of input
|
109
|
+
help="Path of input JSON file",
|
155
110
|
required=True,
|
156
111
|
)
|
157
112
|
parser.add_argument(
|
158
113
|
"--output-file",
|
159
114
|
type=str,
|
160
|
-
help="Path of output
|
115
|
+
help="Path of output JSON file",
|
161
116
|
required=True,
|
162
117
|
)
|
163
118
|
parsed_args = parser.parse_args()
|
@@ -1,6 +1,5 @@
|
|
1
1
|
from pathlib import Path
|
2
2
|
from typing import Any
|
3
|
-
from typing import Optional
|
4
3
|
|
5
4
|
from pydantic import BaseModel
|
6
5
|
from pydantic import ConfigDict
|
@@ -15,56 +14,57 @@ class SlurmTask(BaseModel):
|
|
15
14
|
workdir_local: Path
|
16
15
|
workdir_remote: Path
|
17
16
|
parameters: dict[str, Any]
|
18
|
-
zarr_url:
|
17
|
+
zarr_url: str | None = None
|
19
18
|
task_files: TaskFiles
|
20
19
|
index: int
|
21
20
|
|
21
|
+
workflow_task_order: int
|
22
|
+
workflow_task_id: int
|
23
|
+
task_name: str
|
24
|
+
|
22
25
|
@property
|
23
|
-
def
|
26
|
+
def input_file_local_path(self) -> Path:
|
24
27
|
return (
|
25
|
-
self.workdir_local / f"{self.prefix}-{self.component}-input.
|
28
|
+
self.workdir_local / f"{self.prefix}-{self.component}-input.json"
|
26
29
|
)
|
27
30
|
|
28
31
|
@property
|
29
|
-
def
|
32
|
+
def input_file_remote_path(self) -> Path:
|
30
33
|
return (
|
31
|
-
self.workdir_remote
|
32
|
-
/ f"{self.prefix}-{self.component}-input.pickle"
|
34
|
+
self.workdir_remote / f"{self.prefix}-{self.component}-input.json"
|
33
35
|
)
|
34
36
|
|
35
37
|
@property
|
36
|
-
def
|
38
|
+
def output_file_local_path(self) -> Path:
|
37
39
|
return (
|
38
|
-
self.workdir_local
|
39
|
-
/ f"{self.prefix}-{self.component}-output.pickle"
|
40
|
+
self.workdir_local / f"{self.prefix}-{self.component}-output.json"
|
40
41
|
)
|
41
42
|
|
42
43
|
@property
|
43
|
-
def
|
44
|
+
def output_file_remote_path(self) -> Path:
|
44
45
|
return (
|
45
|
-
self.workdir_remote
|
46
|
-
/ f"{self.prefix}-{self.component}-output.pickle"
|
46
|
+
self.workdir_remote / f"{self.prefix}-{self.component}-output.json"
|
47
47
|
)
|
48
48
|
|
49
49
|
@property
|
50
|
-
def
|
51
|
-
return self.
|
50
|
+
def input_file_local(self) -> str:
|
51
|
+
return self.input_file_local_path.as_posix()
|
52
52
|
|
53
53
|
@property
|
54
|
-
def
|
55
|
-
return self.
|
54
|
+
def input_file_remote(self) -> str:
|
55
|
+
return self.input_file_remote_path.as_posix()
|
56
56
|
|
57
57
|
@property
|
58
|
-
def
|
59
|
-
return self.
|
58
|
+
def output_file_local(self) -> str:
|
59
|
+
return self.output_file_local_path.as_posix()
|
60
60
|
|
61
61
|
@property
|
62
|
-
def
|
63
|
-
return self.
|
62
|
+
def output_file_remote(self) -> str:
|
63
|
+
return self.output_file_remote_path.as_posix()
|
64
64
|
|
65
65
|
|
66
66
|
class SlurmJob(BaseModel):
|
67
|
-
slurm_job_id:
|
67
|
+
slurm_job_id: str | None = None
|
68
68
|
prefix: str
|
69
69
|
workdir_local: Path
|
70
70
|
workdir_remote: Path
|
@@ -1,6 +1,5 @@
|
|
1
1
|
import shlex
|
2
2
|
import subprocess # nosec
|
3
|
-
from typing import Optional
|
4
3
|
|
5
4
|
from fractal_server.logger import get_logger
|
6
5
|
from fractal_server.string_tools import validate_cmd
|
@@ -8,8 +7,8 @@ from fractal_server.string_tools import validate_cmd
|
|
8
7
|
|
9
8
|
def run_subprocess(
|
10
9
|
cmd: str,
|
11
|
-
allow_char:
|
12
|
-
logger_name:
|
10
|
+
allow_char: str | None = None,
|
11
|
+
logger_name: str | None = None,
|
13
12
|
) -> subprocess.CompletedProcess:
|
14
13
|
validate_cmd(cmd, allow_char=allow_char)
|
15
14
|
logger = get_logger(logger_name)
|
@@ -1,6 +1,5 @@
|
|
1
1
|
import time
|
2
2
|
from pathlib import Path
|
3
|
-
from typing import Optional
|
4
3
|
|
5
4
|
from ..slurm_common.base_slurm_runner import BaseSlurmRunner
|
6
5
|
from ..slurm_common.slurm_job_task_models import SlurmJob
|
@@ -27,9 +26,9 @@ class SlurmSSHRunner(BaseSlurmRunner):
|
|
27
26
|
# Common
|
28
27
|
root_dir_local: Path,
|
29
28
|
root_dir_remote: Path,
|
30
|
-
common_script_lines:
|
31
|
-
user_cache_dir:
|
32
|
-
poll_interval:
|
29
|
+
common_script_lines: list[str] | None = None,
|
30
|
+
user_cache_dir: str | None = None,
|
31
|
+
poll_interval: int | None = None,
|
33
32
|
# Specific
|
34
33
|
fractal_ssh: FractalSSH,
|
35
34
|
) -> None:
|
@@ -99,9 +98,8 @@ class SlurmSSHRunner(BaseSlurmRunner):
|
|
99
98
|
for task in _slurm_job.tasks:
|
100
99
|
_single_job_filelist.extend(
|
101
100
|
[
|
102
|
-
task.
|
101
|
+
task.output_file_remote_path.name,
|
103
102
|
task.task_files.log_file_remote_path.name,
|
104
|
-
task.task_files.args_file_remote_path.name,
|
105
103
|
task.task_files.metadiff_file_remote_path.name,
|
106
104
|
]
|
107
105
|
)
|
@@ -17,7 +17,6 @@ another user. Note that this requires appropriate sudo permissions.
|
|
17
17
|
"""
|
18
18
|
import shlex
|
19
19
|
import subprocess # nosec
|
20
|
-
from typing import Optional
|
21
20
|
|
22
21
|
from fractal_server.logger import set_logger
|
23
22
|
from fractal_server.string_tools import validate_cmd
|
@@ -28,8 +27,7 @@ logger = set_logger(__name__)
|
|
28
27
|
def _run_command_as_user(
|
29
28
|
*,
|
30
29
|
cmd: str,
|
31
|
-
user:
|
32
|
-
encoding: Optional[str] = "utf-8",
|
30
|
+
user: str | None = None,
|
33
31
|
check: bool = False,
|
34
32
|
) -> subprocess.CompletedProcess:
|
35
33
|
"""
|
@@ -38,8 +36,6 @@ def _run_command_as_user(
|
|
38
36
|
Arguments:
|
39
37
|
cmd: Command to be run
|
40
38
|
user: User to be impersonated
|
41
|
-
encoding: Argument for `subprocess.run`. Note that this must be `None`
|
42
|
-
to have stdout/stderr as bytes.
|
43
39
|
check: If `True`, check that `returncode=0` and fail otherwise.
|
44
40
|
|
45
41
|
Raises:
|
@@ -57,7 +53,7 @@ def _run_command_as_user(
|
|
57
53
|
res = subprocess.run( # nosec
|
58
54
|
shlex.split(new_cmd),
|
59
55
|
capture_output=True,
|
60
|
-
encoding=
|
56
|
+
encoding="utf-8",
|
61
57
|
)
|
62
58
|
logger.debug(f"[_run_command_as_user] {res.returncode=}")
|
63
59
|
logger.debug(f"[_run_command_as_user] {res.stdout=}")
|