fractal-server 1.4.9__py3-none-any.whl → 2.0.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. fractal_server/__init__.py +1 -1
  2. fractal_server/app/models/__init__.py +4 -7
  3. fractal_server/app/models/linkuserproject.py +9 -0
  4. fractal_server/app/models/security.py +6 -0
  5. fractal_server/app/models/state.py +1 -1
  6. fractal_server/app/models/v1/__init__.py +10 -0
  7. fractal_server/app/models/{dataset.py → v1/dataset.py} +5 -5
  8. fractal_server/app/models/{job.py → v1/job.py} +5 -5
  9. fractal_server/app/models/{project.py → v1/project.py} +5 -5
  10. fractal_server/app/models/{task.py → v1/task.py} +7 -2
  11. fractal_server/app/models/{workflow.py → v1/workflow.py} +5 -5
  12. fractal_server/app/models/v2/__init__.py +20 -0
  13. fractal_server/app/models/v2/dataset.py +55 -0
  14. fractal_server/app/models/v2/job.py +51 -0
  15. fractal_server/app/models/v2/project.py +31 -0
  16. fractal_server/app/models/v2/task.py +93 -0
  17. fractal_server/app/models/v2/workflow.py +43 -0
  18. fractal_server/app/models/v2/workflowtask.py +90 -0
  19. fractal_server/app/routes/{admin.py → admin/v1.py} +42 -42
  20. fractal_server/app/routes/admin/v2.py +275 -0
  21. fractal_server/app/routes/api/v1/__init__.py +7 -7
  22. fractal_server/app/routes/api/v1/_aux_functions.py +2 -2
  23. fractal_server/app/routes/api/v1/dataset.py +44 -37
  24. fractal_server/app/routes/api/v1/job.py +12 -12
  25. fractal_server/app/routes/api/v1/project.py +23 -21
  26. fractal_server/app/routes/api/v1/task.py +24 -14
  27. fractal_server/app/routes/api/v1/task_collection.py +16 -14
  28. fractal_server/app/routes/api/v1/workflow.py +24 -24
  29. fractal_server/app/routes/api/v1/workflowtask.py +10 -10
  30. fractal_server/app/routes/api/v2/__init__.py +28 -0
  31. fractal_server/app/routes/api/v2/_aux_functions.py +497 -0
  32. fractal_server/app/routes/api/v2/apply.py +220 -0
  33. fractal_server/app/routes/api/v2/dataset.py +310 -0
  34. fractal_server/app/routes/api/v2/images.py +212 -0
  35. fractal_server/app/routes/api/v2/job.py +200 -0
  36. fractal_server/app/routes/api/v2/project.py +205 -0
  37. fractal_server/app/routes/api/v2/task.py +222 -0
  38. fractal_server/app/routes/api/v2/task_collection.py +229 -0
  39. fractal_server/app/routes/api/v2/workflow.py +398 -0
  40. fractal_server/app/routes/api/v2/workflowtask.py +269 -0
  41. fractal_server/app/routes/aux/_job.py +1 -1
  42. fractal_server/app/runner/async_wrap.py +27 -0
  43. fractal_server/app/runner/exceptions.py +129 -0
  44. fractal_server/app/runner/executors/local/__init__.py +3 -0
  45. fractal_server/app/runner/{_local → executors/local}/executor.py +2 -2
  46. fractal_server/app/runner/executors/slurm/__init__.py +3 -0
  47. fractal_server/app/runner/{_slurm → executors/slurm}/_batching.py +1 -1
  48. fractal_server/app/runner/executors/slurm/_check_jobs_status.py +72 -0
  49. fractal_server/app/runner/{_slurm → executors/slurm}/_executor_wait_thread.py +3 -4
  50. fractal_server/app/runner/{_slurm → executors/slurm}/_slurm_config.py +3 -152
  51. fractal_server/app/runner/{_slurm → executors/slurm}/_subprocess_run_as_user.py +1 -1
  52. fractal_server/app/runner/{_slurm → executors/slurm}/executor.py +9 -9
  53. fractal_server/app/runner/filenames.py +6 -0
  54. fractal_server/app/runner/set_start_and_last_task_index.py +39 -0
  55. fractal_server/app/runner/task_files.py +105 -0
  56. fractal_server/app/runner/{__init__.py → v1/__init__.py} +36 -49
  57. fractal_server/app/runner/{_common.py → v1/_common.py} +13 -120
  58. fractal_server/app/runner/{_local → v1/_local}/__init__.py +6 -6
  59. fractal_server/app/runner/{_local → v1/_local}/_local_config.py +6 -7
  60. fractal_server/app/runner/{_local → v1/_local}/_submit_setup.py +1 -5
  61. fractal_server/app/runner/v1/_slurm/__init__.py +310 -0
  62. fractal_server/app/runner/{_slurm → v1/_slurm}/_submit_setup.py +3 -9
  63. fractal_server/app/runner/v1/_slurm/get_slurm_config.py +163 -0
  64. fractal_server/app/runner/v1/common.py +117 -0
  65. fractal_server/app/runner/{handle_failed_job.py → v1/handle_failed_job.py} +8 -8
  66. fractal_server/app/runner/v2/__init__.py +337 -0
  67. fractal_server/app/runner/v2/_local/__init__.py +169 -0
  68. fractal_server/app/runner/v2/_local/_local_config.py +118 -0
  69. fractal_server/app/runner/v2/_local/_submit_setup.py +52 -0
  70. fractal_server/app/runner/v2/_slurm/__init__.py +157 -0
  71. fractal_server/app/runner/v2/_slurm/_submit_setup.py +83 -0
  72. fractal_server/app/runner/v2/_slurm/get_slurm_config.py +179 -0
  73. fractal_server/app/runner/v2/components.py +5 -0
  74. fractal_server/app/runner/v2/deduplicate_list.py +24 -0
  75. fractal_server/app/runner/v2/handle_failed_job.py +156 -0
  76. fractal_server/app/runner/v2/merge_outputs.py +41 -0
  77. fractal_server/app/runner/v2/runner.py +264 -0
  78. fractal_server/app/runner/v2/runner_functions.py +339 -0
  79. fractal_server/app/runner/v2/runner_functions_low_level.py +134 -0
  80. fractal_server/app/runner/v2/task_interface.py +43 -0
  81. fractal_server/app/runner/v2/v1_compat.py +21 -0
  82. fractal_server/app/schemas/__init__.py +4 -42
  83. fractal_server/app/schemas/v1/__init__.py +42 -0
  84. fractal_server/app/schemas/{applyworkflow.py → v1/applyworkflow.py} +18 -18
  85. fractal_server/app/schemas/{dataset.py → v1/dataset.py} +30 -30
  86. fractal_server/app/schemas/{dumps.py → v1/dumps.py} +8 -8
  87. fractal_server/app/schemas/{manifest.py → v1/manifest.py} +5 -5
  88. fractal_server/app/schemas/{project.py → v1/project.py} +9 -9
  89. fractal_server/app/schemas/{task.py → v1/task.py} +12 -12
  90. fractal_server/app/schemas/{task_collection.py → v1/task_collection.py} +7 -7
  91. fractal_server/app/schemas/{workflow.py → v1/workflow.py} +38 -38
  92. fractal_server/app/schemas/v2/__init__.py +34 -0
  93. fractal_server/app/schemas/v2/dataset.py +88 -0
  94. fractal_server/app/schemas/v2/dumps.py +87 -0
  95. fractal_server/app/schemas/v2/job.py +113 -0
  96. fractal_server/app/schemas/v2/manifest.py +109 -0
  97. fractal_server/app/schemas/v2/project.py +36 -0
  98. fractal_server/app/schemas/v2/task.py +121 -0
  99. fractal_server/app/schemas/v2/task_collection.py +105 -0
  100. fractal_server/app/schemas/v2/workflow.py +78 -0
  101. fractal_server/app/schemas/v2/workflowtask.py +118 -0
  102. fractal_server/config.py +5 -10
  103. fractal_server/images/__init__.py +50 -0
  104. fractal_server/images/tools.py +86 -0
  105. fractal_server/main.py +11 -3
  106. fractal_server/migrations/versions/4b35c5cefbe3_tmp_is_v2_compatible.py +39 -0
  107. fractal_server/migrations/versions/56af171b0159_v2.py +217 -0
  108. fractal_server/migrations/versions/876f28db9d4e_tmp_split_task_and_wftask_meta.py +68 -0
  109. fractal_server/migrations/versions/974c802f0dd0_tmp_workflowtaskv2_type_in_db.py +37 -0
  110. fractal_server/migrations/versions/9cd305cd6023_tmp_workflowtaskv2.py +40 -0
  111. fractal_server/migrations/versions/a6231ed6273c_tmp_args_schemas_in_taskv2.py +42 -0
  112. fractal_server/migrations/versions/b9e9eed9d442_tmp_taskv2_type.py +37 -0
  113. fractal_server/migrations/versions/e3e639454d4b_tmp_make_task_meta_non_optional.py +50 -0
  114. fractal_server/tasks/__init__.py +0 -5
  115. fractal_server/tasks/endpoint_operations.py +13 -19
  116. fractal_server/tasks/utils.py +35 -0
  117. fractal_server/tasks/{_TaskCollectPip.py → v1/_TaskCollectPip.py} +3 -3
  118. fractal_server/tasks/{background_operations.py → v1/background_operations.py} +18 -50
  119. fractal_server/tasks/v1/get_collection_data.py +14 -0
  120. fractal_server/tasks/v2/_TaskCollectPip.py +103 -0
  121. fractal_server/tasks/v2/background_operations.py +382 -0
  122. fractal_server/tasks/v2/get_collection_data.py +14 -0
  123. {fractal_server-1.4.9.dist-info → fractal_server-2.0.0a0.dist-info}/METADATA +3 -4
  124. fractal_server-2.0.0a0.dist-info/RECORD +166 -0
  125. fractal_server/app/runner/_slurm/.gitignore +0 -2
  126. fractal_server/app/runner/_slurm/__init__.py +0 -150
  127. fractal_server/app/runner/common.py +0 -311
  128. fractal_server-1.4.9.dist-info/RECORD +0 -97
  129. /fractal_server/app/runner/{_slurm → executors/slurm}/remote.py +0 -0
  130. {fractal_server-1.4.9.dist-info → fractal_server-2.0.0a0.dist-info}/LICENSE +0 -0
  131. {fractal_server-1.4.9.dist-info → fractal_server-2.0.0a0.dist-info}/WHEEL +0 -0
  132. {fractal_server-1.4.9.dist-info → fractal_server-2.0.0a0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,163 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ from fractal_server.app.models.v1 import WorkflowTask
5
+ from fractal_server.app.runner.executors.slurm._slurm_config import (
6
+ _parse_mem_value,
7
+ )
8
+ from fractal_server.app.runner.executors.slurm._slurm_config import (
9
+ load_slurm_config_file,
10
+ )
11
+ from fractal_server.app.runner.executors.slurm._slurm_config import logger
12
+ from fractal_server.app.runner.executors.slurm._slurm_config import SlurmConfig
13
+ from fractal_server.app.runner.executors.slurm._slurm_config import (
14
+ SlurmConfigError,
15
+ )
16
+
17
+
18
+ def get_slurm_config(
19
+ wftask: WorkflowTask,
20
+ workflow_dir: Path,
21
+ workflow_dir_user: Path,
22
+ config_path: Optional[Path] = None,
23
+ ) -> SlurmConfig:
24
+ """
25
+ Prepare a `SlurmConfig` configuration object
26
+
27
+ The sources for `SlurmConfig` attributes, in increasing priority order, are
28
+
29
+ 1. The general content of the Fractal SLURM configuration file.
30
+ 2. The GPU-specific content of the Fractal SLURM configuration file, if
31
+ appropriate.
32
+ 3. Properties in `wftask.meta` (which, for `WorkflowTask`s added through
33
+ `Workflow.insert_task`, also includes `wftask.task.meta`);
34
+
35
+ Note: `wftask.meta` may be `None`.
36
+
37
+ Arguments:
38
+ wftask:
39
+ WorkflowTask for which the SLURM configuration is is to be
40
+ prepared.
41
+ workflow_dir:
42
+ Server-owned directory to store all task-execution-related relevant
43
+ files (inputs, outputs, errors, and all meta files related to the
44
+ job execution). Note: users cannot write directly to this folder.
45
+ workflow_dir_user:
46
+ User-side directory with the same scope as `workflow_dir`, and
47
+ where a user can write.
48
+ config_path:
49
+ Path of aFractal SLURM configuration file; if `None`, use
50
+ `FRACTAL_SLURM_CONFIG_FILE` variable from settings.
51
+
52
+ Returns:
53
+ slurm_config:
54
+ The SlurmConfig object
55
+ """
56
+
57
+ logger.debug(
58
+ "[get_slurm_config] WorkflowTask meta attribute: {wftask.meta=}"
59
+ )
60
+
61
+ # Incorporate slurm_env.default_slurm_config
62
+ slurm_env = load_slurm_config_file(config_path=config_path)
63
+ slurm_dict = slurm_env.default_slurm_config.dict(
64
+ exclude_unset=True, exclude={"mem"}
65
+ )
66
+ if slurm_env.default_slurm_config.mem:
67
+ slurm_dict["mem_per_task_MB"] = slurm_env.default_slurm_config.mem
68
+
69
+ # Incorporate slurm_env.batching_config
70
+ for key, value in slurm_env.batching_config.dict().items():
71
+ slurm_dict[key] = value
72
+
73
+ # Incorporate slurm_env.user_local_exports
74
+ slurm_dict["user_local_exports"] = slurm_env.user_local_exports
75
+
76
+ logger.debug(
77
+ "[get_slurm_config] Fractal SLURM configuration file: "
78
+ f"{slurm_env.dict()=}"
79
+ )
80
+
81
+ # GPU-related options
82
+ # Notes about priority:
83
+ # 1. This block of definitions takes priority over other definitions from
84
+ # slurm_env which are not under the `needs_gpu` subgroup
85
+ # 2. This block of definitions has lower priority than whatever comes next
86
+ # (i.e. from WorkflowTask.meta).
87
+ if wftask.meta is not None:
88
+ needs_gpu = wftask.meta.get("needs_gpu", False)
89
+ else:
90
+ needs_gpu = False
91
+ logger.debug(f"[get_slurm_config] {needs_gpu=}")
92
+ if needs_gpu:
93
+ for key, value in slurm_env.gpu_slurm_config.dict(
94
+ exclude_unset=True, exclude={"mem"}
95
+ ).items():
96
+ slurm_dict[key] = value
97
+ if slurm_env.gpu_slurm_config.mem:
98
+ slurm_dict["mem_per_task_MB"] = slurm_env.gpu_slurm_config.mem
99
+
100
+ # Number of CPUs per task, for multithreading
101
+ if wftask.meta is not None and "cpus_per_task" in wftask.meta:
102
+ cpus_per_task = int(wftask.meta["cpus_per_task"])
103
+ slurm_dict["cpus_per_task"] = cpus_per_task
104
+
105
+ # Required memory per task, in MB
106
+ if wftask.meta is not None and "mem" in wftask.meta:
107
+ raw_mem = wftask.meta["mem"]
108
+ mem_per_task_MB = _parse_mem_value(raw_mem)
109
+ slurm_dict["mem_per_task_MB"] = mem_per_task_MB
110
+
111
+ # Job name
112
+ job_name = wftask.task.name.replace(" ", "_")
113
+ slurm_dict["job_name"] = job_name
114
+
115
+ # Optional SLURM arguments and extra lines
116
+ if wftask.meta is not None:
117
+ account = wftask.meta.get("account", None)
118
+ if account is not None:
119
+ error_msg = (
120
+ f"Invalid {account=} property in WorkflowTask `meta` "
121
+ "attribute.\n"
122
+ "SLURM account must be set in the request body of the "
123
+ "apply-workflow endpoint, or by modifying the user properties."
124
+ )
125
+ logger.error(error_msg)
126
+ raise SlurmConfigError(error_msg)
127
+ for key in ["time", "gres", "constraint"]:
128
+ value = wftask.meta.get(key, None)
129
+ if value:
130
+ slurm_dict[key] = value
131
+ if wftask.meta is not None:
132
+ extra_lines = wftask.meta.get("extra_lines", [])
133
+ else:
134
+ extra_lines = []
135
+ extra_lines = slurm_dict.get("extra_lines", []) + extra_lines
136
+ if len(set(extra_lines)) != len(extra_lines):
137
+ logger.debug(
138
+ "[get_slurm_config] Removing repeated elements "
139
+ f"from {extra_lines=}."
140
+ )
141
+ extra_lines = list(set(extra_lines))
142
+ slurm_dict["extra_lines"] = extra_lines
143
+
144
+ # Job-batching parameters (if None, they will be determined heuristically)
145
+ if wftask.meta is not None:
146
+ tasks_per_job = wftask.meta.get("tasks_per_job", None)
147
+ parallel_tasks_per_job = wftask.meta.get(
148
+ "parallel_tasks_per_job", None
149
+ )
150
+ else:
151
+ tasks_per_job = None
152
+ parallel_tasks_per_job = None
153
+ slurm_dict["tasks_per_job"] = tasks_per_job
154
+ slurm_dict["parallel_tasks_per_job"] = parallel_tasks_per_job
155
+
156
+ # Put everything together
157
+ logger.debug(
158
+ "[get_slurm_config] Now create a SlurmConfig object based "
159
+ f"on {slurm_dict=}"
160
+ )
161
+ slurm_config = SlurmConfig(**slurm_dict)
162
+
163
+ return slurm_config
@@ -0,0 +1,117 @@
1
+ """
2
+ Common utilities and routines for runner backends (public API)
3
+
4
+ This module includes utilities and routines that are of use to implement
5
+ runner backends but that should also be exposed to the other components of
6
+ `Fractal Server`.
7
+ """
8
+ import json
9
+ from json import JSONEncoder
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ from pydantic import BaseModel
14
+
15
+ from ....logger import close_logger as close_job_logger # noqa F401
16
+ from ...models.v1 import Dataset
17
+ from ...models.v1 import Workflow
18
+
19
+
20
+ class TaskParameterEncoder(JSONEncoder):
21
+ """
22
+ Convenience JSONEncoder that serialises `Path`s as strings
23
+ """
24
+
25
+ def default(self, value):
26
+ if isinstance(value, Path):
27
+ return value.as_posix()
28
+ return JSONEncoder.default(self, value)
29
+
30
+
31
+ class TaskParameters(BaseModel):
32
+ """
33
+ Wrapper for task input parameters
34
+
35
+ Instances of this class are used to pass parameters from the output of a
36
+ task to the input of the next one.
37
+
38
+ Attributes:
39
+ input_paths:
40
+ Input paths as derived by the input dataset.
41
+ output_paths:
42
+ Output path as derived from the output dataset.
43
+ metadata:
44
+ Dataset metadata, as found in the input dataset or as updated by
45
+ the previous task.
46
+ history:
47
+ Dataset history, as found in the input dataset or as updated by
48
+ the previous task.
49
+ """
50
+
51
+ input_paths: list[Path]
52
+ output_path: Path
53
+ metadata: dict[str, Any]
54
+ history: list[dict[str, Any]]
55
+
56
+ class Config:
57
+ arbitrary_types_allowed = True
58
+ extra = "forbid"
59
+
60
+
61
+ def validate_workflow_compatibility(
62
+ *,
63
+ input_dataset: Dataset,
64
+ workflow: Workflow,
65
+ output_dataset: Dataset,
66
+ first_task_index: int,
67
+ last_task_index: int,
68
+ ) -> None:
69
+ """
70
+ Check compatibility of workflow and input / ouptut dataset
71
+ """
72
+ # Check input_dataset type
73
+ workflow_input_type = workflow.task_list[first_task_index].task.input_type
74
+ if (
75
+ workflow_input_type != "Any"
76
+ and workflow_input_type != input_dataset.type
77
+ ):
78
+ raise TypeError(
79
+ f"Incompatible types `{workflow_input_type}` of workflow "
80
+ f"`{workflow.name}` and `{input_dataset.type}` of dataset "
81
+ f"`{input_dataset.name}`"
82
+ )
83
+
84
+ # Check output_dataset type
85
+ workflow_output_type = workflow.task_list[last_task_index].task.output_type
86
+ if (
87
+ workflow_output_type != "Any"
88
+ and workflow_output_type != output_dataset.type
89
+ ):
90
+ raise TypeError(
91
+ f"Incompatible types `{workflow_output_type}` of workflow "
92
+ f"`{workflow.name}` and `{output_dataset.type}` of dataset "
93
+ f"`{output_dataset.name}`"
94
+ )
95
+
96
+
97
+ def write_args_file(
98
+ *args: dict[str, Any],
99
+ path: Path,
100
+ ):
101
+ """
102
+ Merge arbitrary dictionaries and write to file
103
+
104
+ Args:
105
+ *args:
106
+ One or more dictionaries that will be merged into one respecting
107
+ the order with which they are passed in, i.e., last in overrides
108
+ previous ones.
109
+ path:
110
+ Destination for serialised file.
111
+ """
112
+ out = {}
113
+ for d in args:
114
+ out.update(d)
115
+
116
+ with open(path, "w") as f:
117
+ json.dump(out, f, cls=TaskParameterEncoder, indent=4)
@@ -19,13 +19,13 @@ from pathlib import Path
19
19
  from typing import Any
20
20
  from typing import Optional
21
21
 
22
- from ..models import ApplyWorkflow
23
- from ..models import Dataset
24
- from ..models import Workflow
25
- from ..models import WorkflowTask
26
- from ..models import WorkflowTaskStatusType
27
- from ._common import HISTORY_FILENAME
28
- from ._common import METADATA_FILENAME
22
+ from ...models.v1 import ApplyWorkflow
23
+ from ...models.v1 import Dataset
24
+ from ...models.v1 import Workflow
25
+ from ...models.v1 import WorkflowTask
26
+ from ...schemas.v1 import WorkflowTaskStatusTypeV1
27
+ from ..filenames import HISTORY_FILENAME
28
+ from ..filenames import METADATA_FILENAME
29
29
 
30
30
 
31
31
  def assemble_history_failed_job(
@@ -98,7 +98,7 @@ def assemble_history_failed_job(
98
98
  failed_wftask_dump["task"] = failed_wftask.task.model_dump()
99
99
  new_history_item = dict(
100
100
  workflowtask=failed_wftask_dump,
101
- status=WorkflowTaskStatusType.FAILED,
101
+ status=WorkflowTaskStatusTypeV1.FAILED,
102
102
  parallelization=dict(
103
103
  parallelization_level=failed_wftask.parallelization_level,
104
104
  ),
@@ -0,0 +1,337 @@
1
+ """
2
+ Runner backend subsystem root V2
3
+
4
+ This module is the single entry point to the runner backend subsystem V2.
5
+ Other subystems should only import this module and not its submodules or
6
+ the individual backends.
7
+ """
8
+ import os
9
+ import traceback
10
+ from pathlib import Path
11
+ from typing import Optional
12
+
13
+ from sqlalchemy.orm.attributes import flag_modified
14
+
15
+ from ....config import get_settings
16
+ from ....logger import close_logger
17
+ from ....logger import set_logger
18
+ from ....syringe import Inject
19
+ from ....utils import get_timestamp
20
+ from ...db import DB
21
+ from ...models.v2 import DatasetV2
22
+ from ...models.v2 import JobV2
23
+ from ...models.v2 import WorkflowTaskV2
24
+ from ...models.v2 import WorkflowV2
25
+ from ...schemas.v2 import JobStatusTypeV2
26
+ from ..exceptions import JobExecutionError
27
+ from ..exceptions import TaskExecutionError
28
+ from ..filenames import WORKFLOW_LOG_FILENAME
29
+ from ._local import process_workflow as local_process_workflow
30
+ from ._slurm import process_workflow as slurm_process_workflow
31
+ from .handle_failed_job import assemble_filters_failed_job
32
+ from .handle_failed_job import assemble_history_failed_job
33
+ from .handle_failed_job import assemble_images_failed_job
34
+ from .runner import execute_tasks_v2 # noqa
35
+ from fractal_server import __VERSION__
36
+
37
+ _backends = {}
38
+ _backends["local"] = local_process_workflow
39
+ _backends["slurm"] = slurm_process_workflow
40
+
41
+
42
+ async def submit_workflow(
43
+ *,
44
+ workflow_id: int,
45
+ dataset_id: int,
46
+ job_id: int,
47
+ worker_init: Optional[str] = None,
48
+ slurm_user: Optional[str] = None,
49
+ user_cache_dir: Optional[str] = None,
50
+ ) -> None:
51
+ """
52
+ Prepares a workflow and applies it to a dataset
53
+
54
+ This function wraps the process_workflow one, which is different for each
55
+ backend (e.g. local or slurm backend).
56
+
57
+ Args:
58
+ workflow_id:
59
+ ID of the workflow being applied
60
+ dataset_id:
61
+ Dataset ID
62
+ job_id:
63
+ Id of the job record which stores the state for the current
64
+ workflow application.
65
+ worker_init:
66
+ Custom executor parameters that get parsed before the execution of
67
+ each task.
68
+ user_cache_dir:
69
+ Cache directory (namely a path where the user can write); for the
70
+ slurm backend, this is used as a base directory for
71
+ `job.working_dir_user`.
72
+ slurm_user:
73
+ The username to impersonate for the workflow execution, for the
74
+ slurm backend.
75
+ """
76
+
77
+ # Declare runner backend and set `process_workflow` function
78
+ settings = Inject(get_settings)
79
+ FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
80
+ if FRACTAL_RUNNER_BACKEND == "local":
81
+ process_workflow = local_process_workflow
82
+ elif FRACTAL_RUNNER_BACKEND == "slurm":
83
+ process_workflow = slurm_process_workflow
84
+ else:
85
+ raise RuntimeError(f"Invalid runner backend {FRACTAL_RUNNER_BACKEND=}")
86
+
87
+ with next(DB.get_sync_db()) as db_sync:
88
+
89
+ job: JobV2 = db_sync.get(JobV2, job_id)
90
+ if not job:
91
+ raise ValueError(f"Cannot fetch job {job_id} from database")
92
+
93
+ dataset: DatasetV2 = db_sync.get(DatasetV2, dataset_id)
94
+ workflow: WorkflowV2 = db_sync.get(WorkflowV2, workflow_id)
95
+ if not (dataset and workflow):
96
+ log_msg = ""
97
+ if not dataset:
98
+ log_msg += f"Cannot fetch dataset {dataset_id} from database\n"
99
+ if not workflow:
100
+ log_msg += (
101
+ f"Cannot fetch workflow {workflow_id} from database\n"
102
+ )
103
+ job.status = JobStatusTypeV2.FAILED
104
+ job.end_timestamp = get_timestamp()
105
+ job.log = log_msg
106
+ db_sync.merge(job)
107
+ db_sync.commit()
108
+ db_sync.close()
109
+ return
110
+
111
+ # Define and create server-side working folder
112
+ project_id = workflow.project_id
113
+ timestamp_string = get_timestamp().strftime("%Y%m%d_%H%M%S")
114
+ WORKFLOW_DIR = (
115
+ settings.FRACTAL_RUNNER_WORKING_BASE_DIR
116
+ / (
117
+ f"proj_{project_id:07d}_wf_{workflow_id:07d}_job_{job_id:07d}"
118
+ f"_{timestamp_string}"
119
+ )
120
+ ).resolve()
121
+
122
+ if WORKFLOW_DIR.exists():
123
+ raise RuntimeError(f"Workflow dir {WORKFLOW_DIR} already exists.")
124
+
125
+ # Create WORKFLOW_DIR with 755 permissions
126
+ original_umask = os.umask(0)
127
+ WORKFLOW_DIR.mkdir(parents=True, mode=0o755)
128
+ os.umask(original_umask)
129
+
130
+ # Define and create user-side working folder, if needed
131
+ if FRACTAL_RUNNER_BACKEND == "local":
132
+ WORKFLOW_DIR_USER = WORKFLOW_DIR
133
+ elif FRACTAL_RUNNER_BACKEND == "slurm":
134
+
135
+ from ..executors.slurm._subprocess_run_as_user import (
136
+ _mkdir_as_user,
137
+ )
138
+
139
+ WORKFLOW_DIR_USER = (
140
+ Path(user_cache_dir) / f"{WORKFLOW_DIR.name}"
141
+ ).resolve()
142
+ _mkdir_as_user(folder=str(WORKFLOW_DIR_USER), user=slurm_user)
143
+ else:
144
+ raise ValueError(f"{FRACTAL_RUNNER_BACKEND=} not supported")
145
+
146
+ # Update db
147
+ job.working_dir = WORKFLOW_DIR.as_posix()
148
+ job.working_dir_user = WORKFLOW_DIR_USER.as_posix()
149
+ db_sync.merge(job)
150
+ db_sync.commit()
151
+
152
+ # After Session.commit() is called, either explicitly or when using a
153
+ # context manager, all objects associated with the Session are expired.
154
+ # https://docs.sqlalchemy.org/en/14/orm/
155
+ # session_basics.html#opening-and-closing-a-session
156
+ # https://docs.sqlalchemy.org/en/14/orm/
157
+ # session_state_management.html#refreshing-expiring
158
+
159
+ # See issue #928:
160
+ # https://github.com/fractal-analytics-platform/
161
+ # fractal-server/issues/928
162
+
163
+ db_sync.refresh(dataset)
164
+ db_sync.refresh(workflow)
165
+
166
+ # Write logs
167
+ logger_name = f"WF{workflow_id}_job{job_id}"
168
+ log_file_path = WORKFLOW_DIR / WORKFLOW_LOG_FILENAME
169
+ logger = set_logger(
170
+ logger_name=logger_name,
171
+ log_file_path=log_file_path,
172
+ )
173
+ logger.info(
174
+ f'Start execution of workflow "{workflow.name}"; '
175
+ f"more logs at {str(log_file_path)}"
176
+ )
177
+ logger.debug(f"fractal_server.__VERSION__: {__VERSION__}")
178
+ logger.debug(f"FRACTAL_RUNNER_BACKEND: {FRACTAL_RUNNER_BACKEND}")
179
+ logger.debug(f"slurm_user: {slurm_user}")
180
+ logger.debug(f"slurm_account: {job.slurm_account}")
181
+ logger.debug(f"worker_init: {worker_init}")
182
+ logger.debug(f"job.id: {job.id}")
183
+ logger.debug(f"job.working_dir: {job.working_dir}")
184
+ logger.debug(f"job.working_dir_user: {job.working_dir_user}")
185
+ logger.debug(f"job.first_task_index: {job.first_task_index}")
186
+ logger.debug(f"job.last_task_index: {job.last_task_index}")
187
+ logger.debug(f'START workflow "{workflow.name}"')
188
+
189
+ try:
190
+ # "The Session.close() method does not prevent the Session from being
191
+ # used again. The Session itself does not actually have a distinct
192
+ # “closed” state; it merely means the Session will release all database
193
+ # connections and ORM objects."
194
+ # (https://docs.sqlalchemy.org/en/20/orm/session_api.html#sqlalchemy.orm.Session.close).
195
+ #
196
+ # We close the session before the (possibly long) process_workflow
197
+ # call, to make sure all DB connections are released. The reason why we
198
+ # are not using a context manager within the try block is that we also
199
+ # need access to db_sync in the except branches.
200
+ db_sync = next(DB.get_sync_db())
201
+ db_sync.close()
202
+
203
+ new_dataset_attributes = await process_workflow(
204
+ workflow=workflow,
205
+ dataset=dataset,
206
+ slurm_user=slurm_user,
207
+ slurm_account=job.slurm_account,
208
+ user_cache_dir=user_cache_dir,
209
+ workflow_dir=WORKFLOW_DIR,
210
+ workflow_dir_user=WORKFLOW_DIR_USER,
211
+ logger_name=logger_name,
212
+ worker_init=worker_init,
213
+ first_task_index=job.first_task_index,
214
+ last_task_index=job.last_task_index,
215
+ )
216
+
217
+ logger.info(
218
+ f'End execution of workflow "{workflow.name}"; '
219
+ f"more logs at {str(log_file_path)}"
220
+ )
221
+ logger.debug(f'END workflow "{workflow.name}"')
222
+
223
+ # Update dataset attributes, in case of successful execution
224
+ dataset.history.extend(new_dataset_attributes["history"])
225
+ dataset.filters = new_dataset_attributes["filters"]
226
+ dataset.images = new_dataset_attributes["images"]
227
+ for attribute_name in ["filters", "history", "images"]:
228
+ flag_modified(dataset, attribute_name)
229
+ db_sync.merge(dataset)
230
+
231
+ # Update job DB entry
232
+ job.status = JobStatusTypeV2.DONE
233
+ job.end_timestamp = get_timestamp()
234
+ with log_file_path.open("r") as f:
235
+ logs = f.read()
236
+ job.log = logs
237
+ db_sync.merge(job)
238
+ db_sync.commit()
239
+
240
+ except TaskExecutionError as e:
241
+
242
+ logger.debug(f'FAILED workflow "{workflow.name}", TaskExecutionError.')
243
+ logger.info(f'Workflow "{workflow.name}" failed (TaskExecutionError).')
244
+
245
+ # Read dataset attributes produced by the last successful task, and
246
+ # update the DB dataset accordingly
247
+ failed_wftask = db_sync.get(WorkflowTaskV2, e.workflow_task_id)
248
+ dataset.history = assemble_history_failed_job(
249
+ job,
250
+ dataset,
251
+ workflow,
252
+ logger,
253
+ failed_wftask=failed_wftask,
254
+ )
255
+ latest_filters = assemble_filters_failed_job(job)
256
+ if latest_filters is not None:
257
+ dataset.filters = latest_filters
258
+ latest_images = assemble_images_failed_job(job)
259
+ if latest_images is not None:
260
+ dataset.images = latest_images
261
+ db_sync.merge(dataset)
262
+
263
+ job.status = JobStatusTypeV2.FAILED
264
+ job.end_timestamp = get_timestamp()
265
+
266
+ exception_args_string = "\n".join(e.args)
267
+ job.log = (
268
+ f"TASK ERROR: "
269
+ f"Task name: {e.task_name}, "
270
+ f"position in Workflow: {e.workflow_task_order}\n"
271
+ f"TRACEBACK:\n{exception_args_string}"
272
+ )
273
+ db_sync.merge(job)
274
+ db_sync.commit()
275
+
276
+ except JobExecutionError as e:
277
+
278
+ logger.debug(f'FAILED workflow "{workflow.name}", JobExecutionError.')
279
+ logger.info(f'Workflow "{workflow.name}" failed (JobExecutionError).')
280
+
281
+ # Read dataset attributes produced by the last successful task, and
282
+ # update the DB dataset accordingly
283
+ dataset.history = assemble_history_failed_job(
284
+ job,
285
+ dataset,
286
+ workflow,
287
+ logger,
288
+ )
289
+ latest_filters = assemble_filters_failed_job(job)
290
+ if latest_filters is not None:
291
+ dataset.filters = latest_filters
292
+ latest_images = assemble_images_failed_job(job)
293
+ if latest_images is not None:
294
+ dataset.images = latest_images
295
+ db_sync.merge(dataset)
296
+
297
+ job.status = JobStatusTypeV2.FAILED
298
+ job.end_timestamp = get_timestamp()
299
+ error = e.assemble_error()
300
+ job.log = f"JOB ERROR in Fractal job {job.id}:\nTRACEBACK:\n{error}"
301
+ db_sync.merge(job)
302
+ db_sync.commit()
303
+
304
+ except Exception:
305
+
306
+ logger.debug(f'FAILED workflow "{workflow.name}", unknown error.')
307
+ logger.info(f'Workflow "{workflow.name}" failed (unkwnon error).')
308
+
309
+ current_traceback = traceback.format_exc()
310
+
311
+ # Read dataset attributes produced by the last successful task, and
312
+ # update the DB dataset accordingly
313
+ dataset.history = assemble_history_failed_job(
314
+ job,
315
+ dataset,
316
+ workflow,
317
+ logger,
318
+ )
319
+ latest_filters = assemble_filters_failed_job(job)
320
+ if latest_filters is not None:
321
+ dataset.filters = latest_filters
322
+ latest_images = assemble_images_failed_job(job)
323
+ if latest_images is not None:
324
+ dataset.images = latest_images
325
+ db_sync.merge(dataset)
326
+
327
+ job.status = JobStatusTypeV2.FAILED
328
+ job.end_timestamp = get_timestamp()
329
+ job.log = (
330
+ f"UNKNOWN ERROR in Fractal job {job.id}\n"
331
+ f"TRACEBACK:\n{current_traceback}"
332
+ )
333
+ db_sync.merge(job)
334
+ db_sync.commit()
335
+ finally:
336
+ close_logger(logger)
337
+ db_sync.close()