fractal-server 1.4.9__py3-none-any.whl → 2.0.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/models/__init__.py +4 -7
- fractal_server/app/models/linkuserproject.py +9 -0
- fractal_server/app/models/security.py +6 -0
- fractal_server/app/models/state.py +1 -1
- fractal_server/app/models/v1/__init__.py +10 -0
- fractal_server/app/models/{dataset.py → v1/dataset.py} +5 -5
- fractal_server/app/models/{job.py → v1/job.py} +5 -5
- fractal_server/app/models/{project.py → v1/project.py} +5 -5
- fractal_server/app/models/{task.py → v1/task.py} +7 -2
- fractal_server/app/models/{workflow.py → v1/workflow.py} +5 -5
- fractal_server/app/models/v2/__init__.py +20 -0
- fractal_server/app/models/v2/dataset.py +55 -0
- fractal_server/app/models/v2/job.py +51 -0
- fractal_server/app/models/v2/project.py +31 -0
- fractal_server/app/models/v2/task.py +93 -0
- fractal_server/app/models/v2/workflow.py +43 -0
- fractal_server/app/models/v2/workflowtask.py +90 -0
- fractal_server/app/routes/{admin.py → admin/v1.py} +42 -42
- fractal_server/app/routes/admin/v2.py +275 -0
- fractal_server/app/routes/api/v1/__init__.py +7 -7
- fractal_server/app/routes/api/v1/_aux_functions.py +2 -2
- fractal_server/app/routes/api/v1/dataset.py +44 -37
- fractal_server/app/routes/api/v1/job.py +12 -12
- fractal_server/app/routes/api/v1/project.py +23 -21
- fractal_server/app/routes/api/v1/task.py +24 -14
- fractal_server/app/routes/api/v1/task_collection.py +16 -14
- fractal_server/app/routes/api/v1/workflow.py +24 -24
- fractal_server/app/routes/api/v1/workflowtask.py +10 -10
- fractal_server/app/routes/api/v2/__init__.py +28 -0
- fractal_server/app/routes/api/v2/_aux_functions.py +497 -0
- fractal_server/app/routes/api/v2/apply.py +220 -0
- fractal_server/app/routes/api/v2/dataset.py +310 -0
- fractal_server/app/routes/api/v2/images.py +212 -0
- fractal_server/app/routes/api/v2/job.py +200 -0
- fractal_server/app/routes/api/v2/project.py +205 -0
- fractal_server/app/routes/api/v2/task.py +222 -0
- fractal_server/app/routes/api/v2/task_collection.py +229 -0
- fractal_server/app/routes/api/v2/workflow.py +398 -0
- fractal_server/app/routes/api/v2/workflowtask.py +269 -0
- fractal_server/app/routes/aux/_job.py +1 -1
- fractal_server/app/runner/async_wrap.py +27 -0
- fractal_server/app/runner/exceptions.py +129 -0
- fractal_server/app/runner/executors/local/__init__.py +3 -0
- fractal_server/app/runner/{_local → executors/local}/executor.py +2 -2
- fractal_server/app/runner/executors/slurm/__init__.py +3 -0
- fractal_server/app/runner/{_slurm → executors/slurm}/_batching.py +1 -1
- fractal_server/app/runner/executors/slurm/_check_jobs_status.py +72 -0
- fractal_server/app/runner/{_slurm → executors/slurm}/_executor_wait_thread.py +3 -4
- fractal_server/app/runner/{_slurm → executors/slurm}/_slurm_config.py +3 -152
- fractal_server/app/runner/{_slurm → executors/slurm}/_subprocess_run_as_user.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/executor.py +9 -9
- fractal_server/app/runner/filenames.py +6 -0
- fractal_server/app/runner/set_start_and_last_task_index.py +39 -0
- fractal_server/app/runner/task_files.py +105 -0
- fractal_server/app/runner/{__init__.py → v1/__init__.py} +36 -49
- fractal_server/app/runner/{_common.py → v1/_common.py} +13 -120
- fractal_server/app/runner/{_local → v1/_local}/__init__.py +6 -6
- fractal_server/app/runner/{_local → v1/_local}/_local_config.py +6 -7
- fractal_server/app/runner/{_local → v1/_local}/_submit_setup.py +1 -5
- fractal_server/app/runner/v1/_slurm/__init__.py +310 -0
- fractal_server/app/runner/{_slurm → v1/_slurm}/_submit_setup.py +3 -9
- fractal_server/app/runner/v1/_slurm/get_slurm_config.py +163 -0
- fractal_server/app/runner/v1/common.py +117 -0
- fractal_server/app/runner/{handle_failed_job.py → v1/handle_failed_job.py} +8 -8
- fractal_server/app/runner/v2/__init__.py +337 -0
- fractal_server/app/runner/v2/_local/__init__.py +169 -0
- fractal_server/app/runner/v2/_local/_local_config.py +118 -0
- fractal_server/app/runner/v2/_local/_submit_setup.py +52 -0
- fractal_server/app/runner/v2/_slurm/__init__.py +157 -0
- fractal_server/app/runner/v2/_slurm/_submit_setup.py +83 -0
- fractal_server/app/runner/v2/_slurm/get_slurm_config.py +179 -0
- fractal_server/app/runner/v2/components.py +5 -0
- fractal_server/app/runner/v2/deduplicate_list.py +24 -0
- fractal_server/app/runner/v2/handle_failed_job.py +156 -0
- fractal_server/app/runner/v2/merge_outputs.py +41 -0
- fractal_server/app/runner/v2/runner.py +264 -0
- fractal_server/app/runner/v2/runner_functions.py +339 -0
- fractal_server/app/runner/v2/runner_functions_low_level.py +134 -0
- fractal_server/app/runner/v2/task_interface.py +43 -0
- fractal_server/app/runner/v2/v1_compat.py +21 -0
- fractal_server/app/schemas/__init__.py +4 -42
- fractal_server/app/schemas/v1/__init__.py +42 -0
- fractal_server/app/schemas/{applyworkflow.py → v1/applyworkflow.py} +18 -18
- fractal_server/app/schemas/{dataset.py → v1/dataset.py} +30 -30
- fractal_server/app/schemas/{dumps.py → v1/dumps.py} +8 -8
- fractal_server/app/schemas/{manifest.py → v1/manifest.py} +5 -5
- fractal_server/app/schemas/{project.py → v1/project.py} +9 -9
- fractal_server/app/schemas/{task.py → v1/task.py} +12 -12
- fractal_server/app/schemas/{task_collection.py → v1/task_collection.py} +7 -7
- fractal_server/app/schemas/{workflow.py → v1/workflow.py} +38 -38
- fractal_server/app/schemas/v2/__init__.py +34 -0
- fractal_server/app/schemas/v2/dataset.py +88 -0
- fractal_server/app/schemas/v2/dumps.py +87 -0
- fractal_server/app/schemas/v2/job.py +113 -0
- fractal_server/app/schemas/v2/manifest.py +109 -0
- fractal_server/app/schemas/v2/project.py +36 -0
- fractal_server/app/schemas/v2/task.py +121 -0
- fractal_server/app/schemas/v2/task_collection.py +105 -0
- fractal_server/app/schemas/v2/workflow.py +78 -0
- fractal_server/app/schemas/v2/workflowtask.py +118 -0
- fractal_server/config.py +5 -10
- fractal_server/images/__init__.py +50 -0
- fractal_server/images/tools.py +86 -0
- fractal_server/main.py +11 -3
- fractal_server/migrations/versions/4b35c5cefbe3_tmp_is_v2_compatible.py +39 -0
- fractal_server/migrations/versions/56af171b0159_v2.py +217 -0
- fractal_server/migrations/versions/876f28db9d4e_tmp_split_task_and_wftask_meta.py +68 -0
- fractal_server/migrations/versions/974c802f0dd0_tmp_workflowtaskv2_type_in_db.py +37 -0
- fractal_server/migrations/versions/9cd305cd6023_tmp_workflowtaskv2.py +40 -0
- fractal_server/migrations/versions/a6231ed6273c_tmp_args_schemas_in_taskv2.py +42 -0
- fractal_server/migrations/versions/b9e9eed9d442_tmp_taskv2_type.py +37 -0
- fractal_server/migrations/versions/e3e639454d4b_tmp_make_task_meta_non_optional.py +50 -0
- fractal_server/tasks/__init__.py +0 -5
- fractal_server/tasks/endpoint_operations.py +13 -19
- fractal_server/tasks/utils.py +35 -0
- fractal_server/tasks/{_TaskCollectPip.py → v1/_TaskCollectPip.py} +3 -3
- fractal_server/tasks/{background_operations.py → v1/background_operations.py} +18 -50
- fractal_server/tasks/v1/get_collection_data.py +14 -0
- fractal_server/tasks/v2/_TaskCollectPip.py +103 -0
- fractal_server/tasks/v2/background_operations.py +382 -0
- fractal_server/tasks/v2/get_collection_data.py +14 -0
- {fractal_server-1.4.9.dist-info → fractal_server-2.0.0a0.dist-info}/METADATA +3 -4
- fractal_server-2.0.0a0.dist-info/RECORD +166 -0
- fractal_server/app/runner/_slurm/.gitignore +0 -2
- fractal_server/app/runner/_slurm/__init__.py +0 -150
- fractal_server/app/runner/common.py +0 -311
- fractal_server-1.4.9.dist-info/RECORD +0 -97
- /fractal_server/app/runner/{_slurm → executors/slurm}/remote.py +0 -0
- {fractal_server-1.4.9.dist-info → fractal_server-2.0.0a0.dist-info}/LICENSE +0 -0
- {fractal_server-1.4.9.dist-info → fractal_server-2.0.0a0.dist-info}/WHEEL +0 -0
- {fractal_server-1.4.9.dist-info → fractal_server-2.0.0a0.dist-info}/entry_points.txt +0 -0
@@ -22,10 +22,9 @@ from pydantic import Extra
|
|
22
22
|
from pydantic import Field
|
23
23
|
from pydantic.error_wrappers import ValidationError
|
24
24
|
|
25
|
-
from
|
26
|
-
from
|
27
|
-
from
|
28
|
-
from ...models import WorkflowTask
|
25
|
+
from .....config import get_settings
|
26
|
+
from .....logger import set_logger
|
27
|
+
from .....syringe import Inject
|
29
28
|
|
30
29
|
logger = set_logger(__name__)
|
31
30
|
|
@@ -459,151 +458,3 @@ def get_default_slurm_config():
|
|
459
458
|
target_num_jobs=2,
|
460
459
|
max_num_jobs=4,
|
461
460
|
)
|
462
|
-
|
463
|
-
|
464
|
-
def get_slurm_config(
|
465
|
-
wftask: WorkflowTask,
|
466
|
-
workflow_dir: Path,
|
467
|
-
workflow_dir_user: Path,
|
468
|
-
config_path: Optional[Path] = None,
|
469
|
-
) -> SlurmConfig:
|
470
|
-
"""
|
471
|
-
Prepare a `SlurmConfig` configuration object
|
472
|
-
|
473
|
-
The sources for `SlurmConfig` attributes, in increasing priority order, are
|
474
|
-
|
475
|
-
1. The general content of the Fractal SLURM configuration file.
|
476
|
-
2. The GPU-specific content of the Fractal SLURM configuration file, if
|
477
|
-
appropriate.
|
478
|
-
3. Properties in `wftask.meta` (which, for `WorkflowTask`s added through
|
479
|
-
`Workflow.insert_task`, also includes `wftask.task.meta`);
|
480
|
-
|
481
|
-
Note: `wftask.meta` may be `None`.
|
482
|
-
|
483
|
-
Arguments:
|
484
|
-
wftask:
|
485
|
-
WorkflowTask for which the SLURM configuration is is to be
|
486
|
-
prepared.
|
487
|
-
workflow_dir:
|
488
|
-
Server-owned directory to store all task-execution-related relevant
|
489
|
-
files (inputs, outputs, errors, and all meta files related to the
|
490
|
-
job execution). Note: users cannot write directly to this folder.
|
491
|
-
workflow_dir_user:
|
492
|
-
User-side directory with the same scope as `workflow_dir`, and
|
493
|
-
where a user can write.
|
494
|
-
config_path:
|
495
|
-
Path of aFractal SLURM configuration file; if `None`, use
|
496
|
-
`FRACTAL_SLURM_CONFIG_FILE` variable from settings.
|
497
|
-
|
498
|
-
Returns:
|
499
|
-
slurm_config:
|
500
|
-
The SlurmConfig object
|
501
|
-
"""
|
502
|
-
|
503
|
-
logger.debug(
|
504
|
-
"[get_slurm_config] WorkflowTask meta attribute: {wftask.meta=}"
|
505
|
-
)
|
506
|
-
|
507
|
-
# Incorporate slurm_env.default_slurm_config
|
508
|
-
slurm_env = load_slurm_config_file(config_path=config_path)
|
509
|
-
slurm_dict = slurm_env.default_slurm_config.dict(
|
510
|
-
exclude_unset=True, exclude={"mem"}
|
511
|
-
)
|
512
|
-
if slurm_env.default_slurm_config.mem:
|
513
|
-
slurm_dict["mem_per_task_MB"] = slurm_env.default_slurm_config.mem
|
514
|
-
|
515
|
-
# Incorporate slurm_env.batching_config
|
516
|
-
for key, value in slurm_env.batching_config.dict().items():
|
517
|
-
slurm_dict[key] = value
|
518
|
-
|
519
|
-
# Incorporate slurm_env.user_local_exports
|
520
|
-
slurm_dict["user_local_exports"] = slurm_env.user_local_exports
|
521
|
-
|
522
|
-
logger.debug(
|
523
|
-
"[get_slurm_config] Fractal SLURM configuration file: "
|
524
|
-
f"{slurm_env.dict()=}"
|
525
|
-
)
|
526
|
-
|
527
|
-
# GPU-related options
|
528
|
-
# Notes about priority:
|
529
|
-
# 1. This block of definitions takes priority over other definitions from
|
530
|
-
# slurm_env which are not under the `needs_gpu` subgroup
|
531
|
-
# 2. This block of definitions has lower priority than whatever comes next
|
532
|
-
# (i.e. from WorkflowTask.meta).
|
533
|
-
if wftask.meta is not None:
|
534
|
-
needs_gpu = wftask.meta.get("needs_gpu", False)
|
535
|
-
else:
|
536
|
-
needs_gpu = False
|
537
|
-
logger.debug(f"[get_slurm_config] {needs_gpu=}")
|
538
|
-
if needs_gpu:
|
539
|
-
for key, value in slurm_env.gpu_slurm_config.dict(
|
540
|
-
exclude_unset=True, exclude={"mem"}
|
541
|
-
).items():
|
542
|
-
slurm_dict[key] = value
|
543
|
-
if slurm_env.gpu_slurm_config.mem:
|
544
|
-
slurm_dict["mem_per_task_MB"] = slurm_env.gpu_slurm_config.mem
|
545
|
-
|
546
|
-
# Number of CPUs per task, for multithreading
|
547
|
-
if wftask.meta is not None and "cpus_per_task" in wftask.meta:
|
548
|
-
cpus_per_task = int(wftask.meta["cpus_per_task"])
|
549
|
-
slurm_dict["cpus_per_task"] = cpus_per_task
|
550
|
-
|
551
|
-
# Required memory per task, in MB
|
552
|
-
if wftask.meta is not None and "mem" in wftask.meta:
|
553
|
-
raw_mem = wftask.meta["mem"]
|
554
|
-
mem_per_task_MB = _parse_mem_value(raw_mem)
|
555
|
-
slurm_dict["mem_per_task_MB"] = mem_per_task_MB
|
556
|
-
|
557
|
-
# Job name
|
558
|
-
job_name = wftask.task.name.replace(" ", "_")
|
559
|
-
slurm_dict["job_name"] = job_name
|
560
|
-
|
561
|
-
# Optional SLURM arguments and extra lines
|
562
|
-
if wftask.meta is not None:
|
563
|
-
account = wftask.meta.get("account", None)
|
564
|
-
if account is not None:
|
565
|
-
error_msg = (
|
566
|
-
f"Invalid {account=} property in WorkflowTask `meta` "
|
567
|
-
"attribute.\n"
|
568
|
-
"SLURM account must be set in the request body of the "
|
569
|
-
"apply-workflow endpoint, or by modifying the user properties."
|
570
|
-
)
|
571
|
-
logger.error(error_msg)
|
572
|
-
raise SlurmConfigError(error_msg)
|
573
|
-
for key in ["time", "gres", "constraint"]:
|
574
|
-
value = wftask.meta.get(key, None)
|
575
|
-
if value:
|
576
|
-
slurm_dict[key] = value
|
577
|
-
if wftask.meta is not None:
|
578
|
-
extra_lines = wftask.meta.get("extra_lines", [])
|
579
|
-
else:
|
580
|
-
extra_lines = []
|
581
|
-
extra_lines = slurm_dict.get("extra_lines", []) + extra_lines
|
582
|
-
if len(set(extra_lines)) != len(extra_lines):
|
583
|
-
logger.debug(
|
584
|
-
"[get_slurm_config] Removing repeated elements "
|
585
|
-
f"from {extra_lines=}."
|
586
|
-
)
|
587
|
-
extra_lines = list(set(extra_lines))
|
588
|
-
slurm_dict["extra_lines"] = extra_lines
|
589
|
-
|
590
|
-
# Job-batching parameters (if None, they will be determined heuristically)
|
591
|
-
if wftask.meta is not None:
|
592
|
-
tasks_per_job = wftask.meta.get("tasks_per_job", None)
|
593
|
-
parallel_tasks_per_job = wftask.meta.get(
|
594
|
-
"parallel_tasks_per_job", None
|
595
|
-
)
|
596
|
-
else:
|
597
|
-
tasks_per_job = None
|
598
|
-
parallel_tasks_per_job = None
|
599
|
-
slurm_dict["tasks_per_job"] = tasks_per_job
|
600
|
-
slurm_dict["parallel_tasks_per_job"] = parallel_tasks_per_job
|
601
|
-
|
602
|
-
# Put everything together
|
603
|
-
logger.debug(
|
604
|
-
"[get_slurm_config] Now create a SlurmConfig object based "
|
605
|
-
f"on {slurm_dict=}"
|
606
|
-
)
|
607
|
-
slurm_config = SlurmConfig(**slurm_dict)
|
608
|
-
|
609
|
-
return slurm_config
|
@@ -29,14 +29,14 @@ import cloudpickle
|
|
29
29
|
from cfut import SlurmExecutor
|
30
30
|
from cfut.util import random_string
|
31
31
|
|
32
|
-
from
|
33
|
-
from
|
34
|
-
from
|
35
|
-
from
|
36
|
-
from
|
37
|
-
from
|
38
|
-
from
|
39
|
-
from
|
32
|
+
from .....config import get_settings
|
33
|
+
from .....logger import set_logger
|
34
|
+
from .....syringe import Inject
|
35
|
+
from ...exceptions import JobExecutionError
|
36
|
+
from ...exceptions import TaskExecutionError
|
37
|
+
from ...filenames import SHUTDOWN_FILENAME
|
38
|
+
from ...task_files import get_task_file_paths
|
39
|
+
from ...task_files import TaskFiles
|
40
40
|
from ._batching import heuristics
|
41
41
|
from ._executor_wait_thread import FractalSlurmWaitThread
|
42
42
|
from ._slurm_config import get_default_slurm_config
|
@@ -1001,7 +1001,7 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
1001
1001
|
cmdlines.append(
|
1002
1002
|
(
|
1003
1003
|
f"{python_worker_interpreter}"
|
1004
|
-
" -m fractal_server.app.runner.
|
1004
|
+
" -m fractal_server.app.runner.executors.slurm.remote "
|
1005
1005
|
f"--input-file {input_pickle_file} "
|
1006
1006
|
f"--output-file {output_pickle_file}"
|
1007
1007
|
)
|
@@ -0,0 +1,39 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
3
|
+
|
4
|
+
def set_start_and_last_task_index(
|
5
|
+
num_tasks: int,
|
6
|
+
first_task_index: Optional[int] = None,
|
7
|
+
last_task_index: Optional[int] = None,
|
8
|
+
) -> tuple[int, int]:
|
9
|
+
"""
|
10
|
+
Handle `first_task_index` and `last_task_index`, by setting defaults and
|
11
|
+
validating values.
|
12
|
+
|
13
|
+
num_tasks:
|
14
|
+
Total number of tasks in a workflow task list
|
15
|
+
first_task_index:
|
16
|
+
Positional index of the first task to execute
|
17
|
+
last_task_index:
|
18
|
+
Positional index of the last task to execute
|
19
|
+
"""
|
20
|
+
# Set default values
|
21
|
+
if first_task_index is None:
|
22
|
+
first_task_index = 0
|
23
|
+
if last_task_index is None:
|
24
|
+
last_task_index = num_tasks - 1
|
25
|
+
|
26
|
+
# Perform checks
|
27
|
+
if first_task_index < 0:
|
28
|
+
raise ValueError(f"{first_task_index=} cannot be negative")
|
29
|
+
if last_task_index < 0:
|
30
|
+
raise ValueError(f"{last_task_index=} cannot be negative")
|
31
|
+
if last_task_index > num_tasks - 1:
|
32
|
+
raise ValueError(
|
33
|
+
f"{last_task_index=} cannot be larger than {(num_tasks-1)=}"
|
34
|
+
)
|
35
|
+
if first_task_index > last_task_index:
|
36
|
+
raise ValueError(
|
37
|
+
f"{first_task_index=} cannot be larger than {last_task_index=}"
|
38
|
+
)
|
39
|
+
return (first_task_index, last_task_index)
|
@@ -0,0 +1,105 @@
|
|
1
|
+
from functools import lru_cache
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
|
6
|
+
def sanitize_component(value: str) -> str:
|
7
|
+
"""
|
8
|
+
Remove {" ", "/", "."} form a string, e.g. going from
|
9
|
+
'plate.zarr/B/03/0' to 'plate_zarr_B_03_0'.
|
10
|
+
"""
|
11
|
+
return value.replace(" ", "_").replace("/", "_").replace(".", "_")
|
12
|
+
|
13
|
+
|
14
|
+
class TaskFiles:
|
15
|
+
"""
|
16
|
+
Group all file paths pertaining to a task
|
17
|
+
|
18
|
+
Attributes:
|
19
|
+
workflow_dir:
|
20
|
+
Server-owned directory to store all task-execution-related relevant
|
21
|
+
files (inputs, outputs, errors, and all meta files related to the
|
22
|
+
job execution). Note: users cannot write directly to this folder.
|
23
|
+
workflow_dir_user:
|
24
|
+
User-side directory with the same scope as `workflow_dir`, and
|
25
|
+
where a user can write.
|
26
|
+
task_order:
|
27
|
+
Positional order of the task within a workflow.
|
28
|
+
component:
|
29
|
+
Specific component to run the task for (relevant for tasks that
|
30
|
+
will be executed in parallel over many components).
|
31
|
+
file_prefix:
|
32
|
+
Prefix for all task-related files.
|
33
|
+
args:
|
34
|
+
Path for input json file.
|
35
|
+
metadiff:
|
36
|
+
Path for output json file with metadata update.
|
37
|
+
out:
|
38
|
+
Path for task-execution stdout.
|
39
|
+
err:
|
40
|
+
Path for task-execution stderr.
|
41
|
+
"""
|
42
|
+
|
43
|
+
workflow_dir: Path
|
44
|
+
workflow_dir_user: Path
|
45
|
+
task_order: Optional[int] = None
|
46
|
+
component: Optional[str] = None # FIXME: this is actually for V1 only
|
47
|
+
|
48
|
+
file_prefix: str
|
49
|
+
args: Path
|
50
|
+
out: Path
|
51
|
+
err: Path
|
52
|
+
log: Path
|
53
|
+
metadiff: Path
|
54
|
+
|
55
|
+
def __init__(
|
56
|
+
self,
|
57
|
+
workflow_dir: Path,
|
58
|
+
workflow_dir_user: Path,
|
59
|
+
task_order: Optional[int] = None,
|
60
|
+
component: Optional[str] = None,
|
61
|
+
):
|
62
|
+
self.workflow_dir = workflow_dir
|
63
|
+
self.workflow_dir_user = workflow_dir_user
|
64
|
+
self.task_order = task_order
|
65
|
+
self.component = component
|
66
|
+
|
67
|
+
if self.component is not None:
|
68
|
+
component_safe = sanitize_component(str(self.component))
|
69
|
+
component_safe = f"_par_{component_safe}"
|
70
|
+
else:
|
71
|
+
component_safe = ""
|
72
|
+
|
73
|
+
if self.task_order is not None:
|
74
|
+
order = str(self.task_order)
|
75
|
+
else:
|
76
|
+
order = "task"
|
77
|
+
self.file_prefix = f"{order}{component_safe}"
|
78
|
+
self.args = self.workflow_dir_user / f"{self.file_prefix}.args.json"
|
79
|
+
self.out = self.workflow_dir_user / f"{self.file_prefix}.out"
|
80
|
+
self.err = self.workflow_dir_user / f"{self.file_prefix}.err"
|
81
|
+
self.log = self.workflow_dir_user / f"{self.file_prefix}.log"
|
82
|
+
self.metadiff = (
|
83
|
+
self.workflow_dir_user / f"{self.file_prefix}.metadiff.json"
|
84
|
+
)
|
85
|
+
|
86
|
+
|
87
|
+
@lru_cache()
|
88
|
+
def get_task_file_paths(
|
89
|
+
workflow_dir: Path,
|
90
|
+
workflow_dir_user: Path,
|
91
|
+
task_order: Optional[int] = None,
|
92
|
+
component: Optional[str] = None,
|
93
|
+
) -> TaskFiles:
|
94
|
+
"""
|
95
|
+
Return the corrisponding TaskFiles object
|
96
|
+
|
97
|
+
This function is mainly used as a cache to avoid instantiating needless
|
98
|
+
objects.
|
99
|
+
"""
|
100
|
+
return TaskFiles(
|
101
|
+
workflow_dir=workflow_dir,
|
102
|
+
workflow_dir_user=workflow_dir_user,
|
103
|
+
task_order=task_order,
|
104
|
+
component=component,
|
105
|
+
)
|
@@ -22,52 +22,31 @@ import traceback
|
|
22
22
|
from pathlib import Path
|
23
23
|
from typing import Optional
|
24
24
|
|
25
|
-
from
|
26
|
-
from
|
27
|
-
from
|
28
|
-
from ...
|
29
|
-
from ...
|
30
|
-
from
|
31
|
-
from
|
32
|
-
from
|
33
|
-
from
|
34
|
-
from ..
|
35
|
-
from ..
|
36
|
-
from
|
37
|
-
from ._local import process_workflow as local_process_workflow
|
25
|
+
from ....logger import set_logger
|
26
|
+
from ....syringe import Inject
|
27
|
+
from ....utils import get_timestamp
|
28
|
+
from ...db import DB
|
29
|
+
from ...models import ApplyWorkflow
|
30
|
+
from ...models import Dataset
|
31
|
+
from ...models import Workflow
|
32
|
+
from ...models import WorkflowTask
|
33
|
+
from ...schemas.v1 import JobStatusTypeV1
|
34
|
+
from ..exceptions import JobExecutionError
|
35
|
+
from ..exceptions import TaskExecutionError
|
36
|
+
from ..filenames import WORKFLOW_LOG_FILENAME
|
37
|
+
from ..v1._local import process_workflow as local_process_workflow
|
38
|
+
from ..v1._slurm import process_workflow as slurm_process_workflow
|
38
39
|
from .common import close_job_logger
|
39
|
-
from .common import JobExecutionError
|
40
|
-
from .common import TaskExecutionError
|
41
40
|
from .common import validate_workflow_compatibility # noqa: F401
|
42
41
|
from .handle_failed_job import assemble_history_failed_job
|
43
42
|
from .handle_failed_job import assemble_meta_failed_job
|
43
|
+
from fractal_server import __VERSION__
|
44
|
+
from fractal_server.config import get_settings
|
44
45
|
|
45
46
|
|
46
47
|
_backends = {}
|
47
|
-
_backend_errors: dict[str, Exception] = {}
|
48
48
|
_backends["local"] = local_process_workflow
|
49
|
-
|
50
|
-
try:
|
51
|
-
from ._slurm import process_workflow as slurm_process_workflow
|
52
|
-
|
53
|
-
_backends["slurm"] = slurm_process_workflow
|
54
|
-
except ModuleNotFoundError as e:
|
55
|
-
_backend_errors["slurm"] = e
|
56
|
-
|
57
|
-
|
58
|
-
def get_process_workflow():
|
59
|
-
settings = Inject(get_settings)
|
60
|
-
try:
|
61
|
-
process_workflow = _backends[settings.FRACTAL_RUNNER_BACKEND]
|
62
|
-
except KeyError:
|
63
|
-
raise _backend_errors.get(
|
64
|
-
settings.FRACTAL_RUNNER_BACKEND,
|
65
|
-
RuntimeError(
|
66
|
-
"Unknown error during collection of backend "
|
67
|
-
f"`{settings.FRACTAL_RUNNER_BACKEND}`"
|
68
|
-
),
|
69
|
-
)
|
70
|
-
return process_workflow
|
49
|
+
_backends["slurm"] = slurm_process_workflow
|
71
50
|
|
72
51
|
|
73
52
|
async def submit_workflow(
|
@@ -107,6 +86,17 @@ async def submit_workflow(
|
|
107
86
|
The username to impersonate for the workflow execution, for the
|
108
87
|
slurm backend.
|
109
88
|
"""
|
89
|
+
|
90
|
+
# Declare runner backend and set `process_workflow` function
|
91
|
+
settings = Inject(get_settings)
|
92
|
+
FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
|
93
|
+
if FRACTAL_RUNNER_BACKEND == "local":
|
94
|
+
process_workflow = local_process_workflow
|
95
|
+
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
96
|
+
process_workflow = slurm_process_workflow
|
97
|
+
else:
|
98
|
+
raise RuntimeError(f"Invalid runner backend {FRACTAL_RUNNER_BACKEND=}")
|
99
|
+
|
110
100
|
with next(DB.get_sync_db()) as db_sync:
|
111
101
|
|
112
102
|
job: ApplyWorkflow = db_sync.get(ApplyWorkflow, job_id)
|
@@ -132,7 +122,7 @@ async def submit_workflow(
|
|
132
122
|
log_msg += (
|
133
123
|
f"Cannot fetch workflow {workflow_id} from database\n"
|
134
124
|
)
|
135
|
-
job.status =
|
125
|
+
job.status = JobStatusTypeV1.FAILED
|
136
126
|
job.end_timestamp = get_timestamp()
|
137
127
|
job.log = log_msg
|
138
128
|
db_sync.merge(job)
|
@@ -140,11 +130,6 @@ async def submit_workflow(
|
|
140
130
|
db_sync.close()
|
141
131
|
return
|
142
132
|
|
143
|
-
# Select backend
|
144
|
-
settings = Inject(get_settings)
|
145
|
-
FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
|
146
|
-
process_workflow = get_process_workflow()
|
147
|
-
|
148
133
|
# Prepare some of process_workflow arguments
|
149
134
|
input_paths = input_dataset.paths
|
150
135
|
output_path = output_dataset.paths[0]
|
@@ -173,7 +158,9 @@ async def submit_workflow(
|
|
173
158
|
WORKFLOW_DIR_USER = WORKFLOW_DIR
|
174
159
|
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
175
160
|
|
176
|
-
from .
|
161
|
+
from ..executors.slurm._subprocess_run_as_user import (
|
162
|
+
_mkdir_as_user,
|
163
|
+
)
|
177
164
|
|
178
165
|
WORKFLOW_DIR_USER = (
|
179
166
|
Path(user_cache_dir) / f"{WORKFLOW_DIR.name}"
|
@@ -274,7 +261,7 @@ async def submit_workflow(
|
|
274
261
|
db_sync.merge(output_dataset)
|
275
262
|
|
276
263
|
# Update job DB entry
|
277
|
-
job.status =
|
264
|
+
job.status = JobStatusTypeV1.DONE
|
278
265
|
job.end_timestamp = get_timestamp()
|
279
266
|
with log_file_path.open("r") as f:
|
280
267
|
logs = f.read()
|
@@ -304,7 +291,7 @@ async def submit_workflow(
|
|
304
291
|
|
305
292
|
db_sync.merge(output_dataset)
|
306
293
|
|
307
|
-
job.status =
|
294
|
+
job.status = JobStatusTypeV1.FAILED
|
308
295
|
job.end_timestamp = get_timestamp()
|
309
296
|
|
310
297
|
exception_args_string = "\n".join(e.args)
|
@@ -337,7 +324,7 @@ async def submit_workflow(
|
|
337
324
|
|
338
325
|
db_sync.merge(output_dataset)
|
339
326
|
|
340
|
-
job.status =
|
327
|
+
job.status = JobStatusTypeV1.FAILED
|
341
328
|
job.end_timestamp = get_timestamp()
|
342
329
|
error = e.assemble_error()
|
343
330
|
job.log = f"JOB ERROR in Fractal job {job.id}:\nTRACEBACK:\n{error}"
|
@@ -366,7 +353,7 @@ async def submit_workflow(
|
|
366
353
|
|
367
354
|
db_sync.merge(output_dataset)
|
368
355
|
|
369
|
-
job.status =
|
356
|
+
job.status = JobStatusTypeV1.FAILED
|
370
357
|
job.end_timestamp = get_timestamp()
|
371
358
|
job.log = (
|
372
359
|
f"UNKNOWN ERROR in Fractal job {job.id}\n"
|