fractal-server 1.4.9__py3-none-any.whl → 2.0.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. fractal_server/__init__.py +1 -1
  2. fractal_server/app/models/__init__.py +4 -7
  3. fractal_server/app/models/linkuserproject.py +9 -0
  4. fractal_server/app/models/security.py +6 -0
  5. fractal_server/app/models/state.py +1 -1
  6. fractal_server/app/models/v1/__init__.py +10 -0
  7. fractal_server/app/models/{dataset.py → v1/dataset.py} +5 -5
  8. fractal_server/app/models/{job.py → v1/job.py} +5 -5
  9. fractal_server/app/models/{project.py → v1/project.py} +5 -5
  10. fractal_server/app/models/{task.py → v1/task.py} +7 -2
  11. fractal_server/app/models/{workflow.py → v1/workflow.py} +5 -5
  12. fractal_server/app/models/v2/__init__.py +20 -0
  13. fractal_server/app/models/v2/dataset.py +55 -0
  14. fractal_server/app/models/v2/job.py +51 -0
  15. fractal_server/app/models/v2/project.py +31 -0
  16. fractal_server/app/models/v2/task.py +93 -0
  17. fractal_server/app/models/v2/workflow.py +43 -0
  18. fractal_server/app/models/v2/workflowtask.py +90 -0
  19. fractal_server/app/routes/{admin.py → admin/v1.py} +42 -42
  20. fractal_server/app/routes/admin/v2.py +275 -0
  21. fractal_server/app/routes/api/v1/__init__.py +7 -7
  22. fractal_server/app/routes/api/v1/_aux_functions.py +2 -2
  23. fractal_server/app/routes/api/v1/dataset.py +44 -37
  24. fractal_server/app/routes/api/v1/job.py +12 -12
  25. fractal_server/app/routes/api/v1/project.py +23 -21
  26. fractal_server/app/routes/api/v1/task.py +24 -14
  27. fractal_server/app/routes/api/v1/task_collection.py +16 -14
  28. fractal_server/app/routes/api/v1/workflow.py +24 -24
  29. fractal_server/app/routes/api/v1/workflowtask.py +10 -10
  30. fractal_server/app/routes/api/v2/__init__.py +28 -0
  31. fractal_server/app/routes/api/v2/_aux_functions.py +497 -0
  32. fractal_server/app/routes/api/v2/apply.py +220 -0
  33. fractal_server/app/routes/api/v2/dataset.py +310 -0
  34. fractal_server/app/routes/api/v2/images.py +212 -0
  35. fractal_server/app/routes/api/v2/job.py +200 -0
  36. fractal_server/app/routes/api/v2/project.py +205 -0
  37. fractal_server/app/routes/api/v2/task.py +222 -0
  38. fractal_server/app/routes/api/v2/task_collection.py +229 -0
  39. fractal_server/app/routes/api/v2/workflow.py +398 -0
  40. fractal_server/app/routes/api/v2/workflowtask.py +269 -0
  41. fractal_server/app/routes/aux/_job.py +1 -1
  42. fractal_server/app/runner/async_wrap.py +27 -0
  43. fractal_server/app/runner/exceptions.py +129 -0
  44. fractal_server/app/runner/executors/local/__init__.py +3 -0
  45. fractal_server/app/runner/{_local → executors/local}/executor.py +2 -2
  46. fractal_server/app/runner/executors/slurm/__init__.py +3 -0
  47. fractal_server/app/runner/{_slurm → executors/slurm}/_batching.py +1 -1
  48. fractal_server/app/runner/executors/slurm/_check_jobs_status.py +72 -0
  49. fractal_server/app/runner/{_slurm → executors/slurm}/_executor_wait_thread.py +3 -4
  50. fractal_server/app/runner/{_slurm → executors/slurm}/_slurm_config.py +3 -152
  51. fractal_server/app/runner/{_slurm → executors/slurm}/_subprocess_run_as_user.py +1 -1
  52. fractal_server/app/runner/{_slurm → executors/slurm}/executor.py +9 -9
  53. fractal_server/app/runner/filenames.py +6 -0
  54. fractal_server/app/runner/set_start_and_last_task_index.py +39 -0
  55. fractal_server/app/runner/task_files.py +105 -0
  56. fractal_server/app/runner/{__init__.py → v1/__init__.py} +36 -49
  57. fractal_server/app/runner/{_common.py → v1/_common.py} +13 -120
  58. fractal_server/app/runner/{_local → v1/_local}/__init__.py +6 -6
  59. fractal_server/app/runner/{_local → v1/_local}/_local_config.py +6 -7
  60. fractal_server/app/runner/{_local → v1/_local}/_submit_setup.py +1 -5
  61. fractal_server/app/runner/v1/_slurm/__init__.py +310 -0
  62. fractal_server/app/runner/{_slurm → v1/_slurm}/_submit_setup.py +3 -9
  63. fractal_server/app/runner/v1/_slurm/get_slurm_config.py +163 -0
  64. fractal_server/app/runner/v1/common.py +117 -0
  65. fractal_server/app/runner/{handle_failed_job.py → v1/handle_failed_job.py} +8 -8
  66. fractal_server/app/runner/v2/__init__.py +337 -0
  67. fractal_server/app/runner/v2/_local/__init__.py +169 -0
  68. fractal_server/app/runner/v2/_local/_local_config.py +118 -0
  69. fractal_server/app/runner/v2/_local/_submit_setup.py +52 -0
  70. fractal_server/app/runner/v2/_slurm/__init__.py +157 -0
  71. fractal_server/app/runner/v2/_slurm/_submit_setup.py +83 -0
  72. fractal_server/app/runner/v2/_slurm/get_slurm_config.py +179 -0
  73. fractal_server/app/runner/v2/components.py +5 -0
  74. fractal_server/app/runner/v2/deduplicate_list.py +24 -0
  75. fractal_server/app/runner/v2/handle_failed_job.py +156 -0
  76. fractal_server/app/runner/v2/merge_outputs.py +41 -0
  77. fractal_server/app/runner/v2/runner.py +264 -0
  78. fractal_server/app/runner/v2/runner_functions.py +339 -0
  79. fractal_server/app/runner/v2/runner_functions_low_level.py +134 -0
  80. fractal_server/app/runner/v2/task_interface.py +43 -0
  81. fractal_server/app/runner/v2/v1_compat.py +21 -0
  82. fractal_server/app/schemas/__init__.py +4 -42
  83. fractal_server/app/schemas/v1/__init__.py +42 -0
  84. fractal_server/app/schemas/{applyworkflow.py → v1/applyworkflow.py} +18 -18
  85. fractal_server/app/schemas/{dataset.py → v1/dataset.py} +30 -30
  86. fractal_server/app/schemas/{dumps.py → v1/dumps.py} +8 -8
  87. fractal_server/app/schemas/{manifest.py → v1/manifest.py} +5 -5
  88. fractal_server/app/schemas/{project.py → v1/project.py} +9 -9
  89. fractal_server/app/schemas/{task.py → v1/task.py} +12 -12
  90. fractal_server/app/schemas/{task_collection.py → v1/task_collection.py} +7 -7
  91. fractal_server/app/schemas/{workflow.py → v1/workflow.py} +38 -38
  92. fractal_server/app/schemas/v2/__init__.py +34 -0
  93. fractal_server/app/schemas/v2/dataset.py +88 -0
  94. fractal_server/app/schemas/v2/dumps.py +87 -0
  95. fractal_server/app/schemas/v2/job.py +113 -0
  96. fractal_server/app/schemas/v2/manifest.py +109 -0
  97. fractal_server/app/schemas/v2/project.py +36 -0
  98. fractal_server/app/schemas/v2/task.py +121 -0
  99. fractal_server/app/schemas/v2/task_collection.py +105 -0
  100. fractal_server/app/schemas/v2/workflow.py +78 -0
  101. fractal_server/app/schemas/v2/workflowtask.py +118 -0
  102. fractal_server/config.py +5 -10
  103. fractal_server/images/__init__.py +50 -0
  104. fractal_server/images/tools.py +86 -0
  105. fractal_server/main.py +11 -3
  106. fractal_server/migrations/versions/4b35c5cefbe3_tmp_is_v2_compatible.py +39 -0
  107. fractal_server/migrations/versions/56af171b0159_v2.py +217 -0
  108. fractal_server/migrations/versions/876f28db9d4e_tmp_split_task_and_wftask_meta.py +68 -0
  109. fractal_server/migrations/versions/974c802f0dd0_tmp_workflowtaskv2_type_in_db.py +37 -0
  110. fractal_server/migrations/versions/9cd305cd6023_tmp_workflowtaskv2.py +40 -0
  111. fractal_server/migrations/versions/a6231ed6273c_tmp_args_schemas_in_taskv2.py +42 -0
  112. fractal_server/migrations/versions/b9e9eed9d442_tmp_taskv2_type.py +37 -0
  113. fractal_server/migrations/versions/e3e639454d4b_tmp_make_task_meta_non_optional.py +50 -0
  114. fractal_server/tasks/__init__.py +0 -5
  115. fractal_server/tasks/endpoint_operations.py +13 -19
  116. fractal_server/tasks/utils.py +35 -0
  117. fractal_server/tasks/{_TaskCollectPip.py → v1/_TaskCollectPip.py} +3 -3
  118. fractal_server/tasks/{background_operations.py → v1/background_operations.py} +18 -50
  119. fractal_server/tasks/v1/get_collection_data.py +14 -0
  120. fractal_server/tasks/v2/_TaskCollectPip.py +103 -0
  121. fractal_server/tasks/v2/background_operations.py +382 -0
  122. fractal_server/tasks/v2/get_collection_data.py +14 -0
  123. {fractal_server-1.4.9.dist-info → fractal_server-2.0.0a0.dist-info}/METADATA +3 -4
  124. fractal_server-2.0.0a0.dist-info/RECORD +166 -0
  125. fractal_server/app/runner/_slurm/.gitignore +0 -2
  126. fractal_server/app/runner/_slurm/__init__.py +0 -150
  127. fractal_server/app/runner/common.py +0 -311
  128. fractal_server-1.4.9.dist-info/RECORD +0 -97
  129. /fractal_server/app/runner/{_slurm → executors/slurm}/remote.py +0 -0
  130. {fractal_server-1.4.9.dist-info → fractal_server-2.0.0a0.dist-info}/LICENSE +0 -0
  131. {fractal_server-1.4.9.dist-info → fractal_server-2.0.0a0.dist-info}/WHEEL +0 -0
  132. {fractal_server-1.4.9.dist-info → fractal_server-2.0.0a0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,179 @@
1
+ from pathlib import Path
2
+ from typing import Literal
3
+ from typing import Optional
4
+
5
+ from fractal_server.app.models.v2 import WorkflowTaskV2
6
+ from fractal_server.app.runner.executors.slurm._slurm_config import (
7
+ _parse_mem_value,
8
+ )
9
+ from fractal_server.app.runner.executors.slurm._slurm_config import (
10
+ load_slurm_config_file,
11
+ )
12
+ from fractal_server.app.runner.executors.slurm._slurm_config import logger
13
+ from fractal_server.app.runner.executors.slurm._slurm_config import SlurmConfig
14
+ from fractal_server.app.runner.executors.slurm._slurm_config import (
15
+ SlurmConfigError,
16
+ )
17
+
18
+
19
+ def get_slurm_config(
20
+ wftask: WorkflowTaskV2,
21
+ workflow_dir: Path,
22
+ workflow_dir_user: Path,
23
+ which_type: Literal["non_parallel", "parallel"],
24
+ config_path: Optional[Path] = None,
25
+ ) -> SlurmConfig:
26
+ """
27
+ Prepare a `SlurmConfig` configuration object
28
+
29
+ The argument `which_type` determines whether we use `wftask.meta_parallel`
30
+ or `wftask.meta_non_parallel`. In the following descritpion, let us assume
31
+ that `which_type="parallel"`.
32
+
33
+ The sources for `SlurmConfig` attributes, in increasing priority order, are
34
+
35
+ 1. The general content of the Fractal SLURM configuration file.
36
+ 2. The GPU-specific content of the Fractal SLURM configuration file, if
37
+ appropriate.
38
+ 3. Properties in `wftask.meta_parallel` (which typically include those in
39
+ `wftask.task.meta_parallel`). Note that `wftask.meta_parallel` may be
40
+ `None`.
41
+
42
+ Arguments:
43
+ wftask:
44
+ WorkflowTask for which the SLURM configuration is is to be
45
+ prepared.
46
+ workflow_dir:
47
+ Server-owned directory to store all task-execution-related relevant
48
+ files (inputs, outputs, errors, and all meta files related to the
49
+ job execution). Note: users cannot write directly to this folder.
50
+ workflow_dir_user:
51
+ User-side directory with the same scope as `workflow_dir`, and
52
+ where a user can write.
53
+ config_path:
54
+ Path of a Fractal SLURM configuration file; if `None`, use
55
+ `FRACTAL_SLURM_CONFIG_FILE` variable from settings.
56
+ which_type:
57
+ Determines whether to use `meta_parallel` or `meta_non_parallel`.
58
+
59
+ Returns:
60
+ slurm_config:
61
+ The SlurmConfig object
62
+ """
63
+
64
+ if which_type == "non_parallel":
65
+ wftask_meta = wftask.meta_non_parallel
66
+ elif which_type == "parallel":
67
+ wftask_meta = wftask.meta_parallel
68
+ else:
69
+ raise ValueError(
70
+ f"get_slurm_config received invalid argument {which_type=}."
71
+ )
72
+
73
+ logger.debug(
74
+ "[get_slurm_config] WorkflowTask meta attribute: {wftask_meta=}"
75
+ )
76
+
77
+ # Incorporate slurm_env.default_slurm_config
78
+ slurm_env = load_slurm_config_file(config_path=config_path)
79
+ slurm_dict = slurm_env.default_slurm_config.dict(
80
+ exclude_unset=True, exclude={"mem"}
81
+ )
82
+ if slurm_env.default_slurm_config.mem:
83
+ slurm_dict["mem_per_task_MB"] = slurm_env.default_slurm_config.mem
84
+
85
+ # Incorporate slurm_env.batching_config
86
+ for key, value in slurm_env.batching_config.dict().items():
87
+ slurm_dict[key] = value
88
+
89
+ # Incorporate slurm_env.user_local_exports
90
+ slurm_dict["user_local_exports"] = slurm_env.user_local_exports
91
+
92
+ logger.debug(
93
+ "[get_slurm_config] Fractal SLURM configuration file: "
94
+ f"{slurm_env.dict()=}"
95
+ )
96
+
97
+ # GPU-related options
98
+ # Notes about priority:
99
+ # 1. This block of definitions takes priority over other definitions from
100
+ # slurm_env which are not under the `needs_gpu` subgroup
101
+ # 2. This block of definitions has lower priority than whatever comes next
102
+ # (i.e. from WorkflowTask.meta).
103
+ if wftask_meta is not None:
104
+ needs_gpu = wftask_meta.get("needs_gpu", False)
105
+ else:
106
+ needs_gpu = False
107
+ logger.debug(f"[get_slurm_config] {needs_gpu=}")
108
+ if needs_gpu:
109
+ for key, value in slurm_env.gpu_slurm_config.dict(
110
+ exclude_unset=True, exclude={"mem"}
111
+ ).items():
112
+ slurm_dict[key] = value
113
+ if slurm_env.gpu_slurm_config.mem:
114
+ slurm_dict["mem_per_task_MB"] = slurm_env.gpu_slurm_config.mem
115
+
116
+ # Number of CPUs per task, for multithreading
117
+ if wftask_meta is not None and "cpus_per_task" in wftask_meta:
118
+ cpus_per_task = int(wftask_meta["cpus_per_task"])
119
+ slurm_dict["cpus_per_task"] = cpus_per_task
120
+
121
+ # Required memory per task, in MB
122
+ if wftask_meta is not None and "mem" in wftask_meta:
123
+ raw_mem = wftask_meta["mem"]
124
+ mem_per_task_MB = _parse_mem_value(raw_mem)
125
+ slurm_dict["mem_per_task_MB"] = mem_per_task_MB
126
+
127
+ # Job name
128
+ job_name = wftask.task.name.replace(" ", "_")
129
+ slurm_dict["job_name"] = job_name
130
+
131
+ # Optional SLURM arguments and extra lines
132
+ if wftask_meta is not None:
133
+ account = wftask_meta.get("account", None)
134
+ if account is not None:
135
+ error_msg = (
136
+ f"Invalid {account=} property in WorkflowTask `meta` "
137
+ "attribute.\n"
138
+ "SLURM account must be set in the request body of the "
139
+ "apply-workflow endpoint, or by modifying the user properties."
140
+ )
141
+ logger.error(error_msg)
142
+ raise SlurmConfigError(error_msg)
143
+ for key in ["time", "gres", "constraint"]:
144
+ value = wftask_meta.get(key, None)
145
+ if value:
146
+ slurm_dict[key] = value
147
+ if wftask_meta is not None:
148
+ extra_lines = wftask_meta.get("extra_lines", [])
149
+ else:
150
+ extra_lines = []
151
+ extra_lines = slurm_dict.get("extra_lines", []) + extra_lines
152
+ if len(set(extra_lines)) != len(extra_lines):
153
+ logger.debug(
154
+ "[get_slurm_config] Removing repeated elements "
155
+ f"from {extra_lines=}."
156
+ )
157
+ extra_lines = list(set(extra_lines))
158
+ slurm_dict["extra_lines"] = extra_lines
159
+
160
+ # Job-batching parameters (if None, they will be determined heuristically)
161
+ if wftask_meta is not None:
162
+ tasks_per_job = wftask_meta.get("tasks_per_job", None)
163
+ parallel_tasks_per_job = wftask_meta.get(
164
+ "parallel_tasks_per_job", None
165
+ )
166
+ else:
167
+ tasks_per_job = None
168
+ parallel_tasks_per_job = None
169
+ slurm_dict["tasks_per_job"] = tasks_per_job
170
+ slurm_dict["parallel_tasks_per_job"] = parallel_tasks_per_job
171
+
172
+ # Put everything together
173
+ logger.debug(
174
+ "[get_slurm_config] Now create a SlurmConfig object based "
175
+ f"on {slurm_dict=}"
176
+ )
177
+ slurm_config = SlurmConfig(**slurm_dict)
178
+
179
+ return slurm_config
@@ -0,0 +1,5 @@
1
+ def _index_to_component(ind: int) -> str:
2
+ return f"{ind:07d}"
3
+
4
+
5
+ _COMPONENT_KEY_ = "__FRACTAL_PARALLEL_COMPONENT__"
@@ -0,0 +1,24 @@
1
+ from typing import TypeVar
2
+
3
+ from pydantic.main import ModelMetaclass
4
+
5
+ from ....images import SingleImage
6
+ from .task_interface import InitArgsModel
7
+
8
+ T = TypeVar("T", SingleImage, InitArgsModel)
9
+
10
+
11
+ def deduplicate_list(
12
+ this_list: list[T], PydanticModel: ModelMetaclass
13
+ ) -> list[T]:
14
+ """
15
+ Custom replacement for `set(this_list)`, when items are Pydantic-model
16
+ instances and then non-hashable (e.g. SingleImage or InitArgsModel).
17
+ """
18
+ this_list_dict = [this_item.dict() for this_item in this_list]
19
+ new_list_dict = []
20
+ for this_dict in this_list_dict:
21
+ if this_dict not in new_list_dict:
22
+ new_list_dict.append(this_dict)
23
+ new_list = [PydanticModel(**this_dict) for this_dict in new_list_dict]
24
+ return new_list
@@ -0,0 +1,156 @@
1
+ # Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
2
+ # University of Zurich
3
+ #
4
+ # Original authors:
5
+ # Tommaso Comparin <tommaso.comparin@exact-lab.it>
6
+ # Marco Franzon <marco.franzon@exact-lab.it>
7
+ #
8
+ # This file is part of Fractal and was originally developed by eXact lab S.r.l.
9
+ # <exact-lab.it> under contract with Liberali Lab from the Friedrich Miescher
10
+ # Institute for Biomedical Research and Pelkmans Lab from the University of
11
+ # Zurich.
12
+ """
13
+ Helper functions to handle Dataset history.
14
+ """
15
+ import json
16
+ import logging
17
+ from pathlib import Path
18
+ from typing import Any
19
+ from typing import Optional
20
+
21
+ from ...models.v2 import DatasetV2
22
+ from ...models.v2 import JobV2
23
+ from ...models.v2 import WorkflowTaskV2
24
+ from ...models.v2 import WorkflowV2
25
+ from ...schemas.v2 import WorkflowTaskStatusTypeV2
26
+ from ..filenames import FILTERS_FILENAME
27
+ from ..filenames import HISTORY_FILENAME
28
+ from ..filenames import IMAGES_FILENAME
29
+
30
+
31
+ def assemble_history_failed_job(
32
+ job: JobV2,
33
+ dataset: DatasetV2,
34
+ workflow: WorkflowV2,
35
+ logger: logging.Logger,
36
+ failed_wftask: Optional[WorkflowTaskV2] = None,
37
+ ) -> list[dict[str, Any]]:
38
+ """
39
+ Assemble `history` after a workflow-execution job fails.
40
+
41
+ Args:
42
+ job:
43
+ The failed `ApplyWorkflow` object.
44
+ output_dataset:
45
+ The `dataset` associated to `job`.
46
+ workflow:
47
+ The `workflow` associated to `job`.
48
+ logger: A logger instance.
49
+ failed_wftask:
50
+ If set, append it to `history` during step 3; if `None`, infer
51
+ it by comparing the job task list and the one in
52
+ `HISTORY_FILENAME`.
53
+
54
+ Returns:
55
+ The new value of `history`, to be merged into
56
+ `output_dataset.meta`.
57
+ """
58
+
59
+ # The final value of the history attribute should include up to three
60
+ # parts, coming from: the database, the temporary file, the failed-task
61
+ # information.
62
+
63
+ # Part 1: Read exising history from DB
64
+ new_history = dataset.history
65
+
66
+ # Part 2: Extend history based on temporary-file contents
67
+ tmp_history_file = Path(job.working_dir) / HISTORY_FILENAME
68
+ try:
69
+ with tmp_history_file.open("r") as f:
70
+ tmp_file_history = json.load(f)
71
+ new_history.extend(tmp_file_history)
72
+ except FileNotFoundError:
73
+ tmp_file_history = []
74
+
75
+ # Part 3/A: Identify failed task, if needed
76
+ if failed_wftask is None:
77
+ job_wftasks = workflow.task_list[
78
+ job.first_task_index : (job.last_task_index + 1) # noqa
79
+ ]
80
+ tmp_file_wftasks = [
81
+ history_item["workflowtask"] for history_item in tmp_file_history
82
+ ]
83
+ if len(job_wftasks) <= len(tmp_file_wftasks):
84
+ n_tasks_job = len(job_wftasks)
85
+ n_tasks_tmp = len(tmp_file_wftasks)
86
+ logger.error(
87
+ "Cannot identify the failed task based on job task list "
88
+ f"(length {n_tasks_job}) and temporary-file task list "
89
+ f"(length {n_tasks_tmp})."
90
+ )
91
+ logger.error("Failed task not appended to history.")
92
+ else:
93
+ failed_wftask = job_wftasks[len(tmp_file_wftasks)]
94
+
95
+ # Part 3/B: Append failed task to history
96
+ if failed_wftask is not None:
97
+ failed_wftask_dump = failed_wftask.model_dump(exclude={"task"})
98
+ failed_wftask_dump["task"] = failed_wftask.task.model_dump()
99
+ new_history_item = dict(
100
+ workflowtask=failed_wftask_dump,
101
+ status=WorkflowTaskStatusTypeV2.FAILED,
102
+ parallelization=dict(), # FIXME: re-include parallelization
103
+ )
104
+ new_history.append(new_history_item)
105
+
106
+ return new_history
107
+
108
+
109
+ def assemble_images_failed_job(job: JobV2) -> Optional[dict[str, Any]]:
110
+ """
111
+ Assemble `DatasetV2.images` for a failed workflow-execution.
112
+
113
+ Assemble new value of `images` based on the last successful task, i.e.
114
+ based on the content of the temporary `IMAGES_FILENAME` file. If the file
115
+ is missing, return `None`.
116
+
117
+ Argumentss:
118
+ job:
119
+ The failed `JobV2` object.
120
+
121
+ Returns:
122
+ The new value of `dataset.images`, or `None` if `IMAGES_FILENAME`
123
+ is missing.
124
+ """
125
+ tmp_file = Path(job.working_dir) / IMAGES_FILENAME
126
+ try:
127
+ with tmp_file.open("r") as f:
128
+ new_images = json.load(f)
129
+ return new_images
130
+ except FileNotFoundError:
131
+ return None
132
+
133
+
134
+ def assemble_filters_failed_job(job: JobV2) -> Optional[dict[str, Any]]:
135
+ """
136
+ Assemble `DatasetV2.filters` for a failed workflow-execution.
137
+
138
+ Assemble new value of `filters` based on the last successful task, i.e.
139
+ based on the content of the temporary `FILTERS_FILENAME` file. If the file
140
+ is missing, return `None`.
141
+
142
+ Argumentss:
143
+ job:
144
+ The failed `JobV2` object.
145
+
146
+ Returns:
147
+ The new value of `dataset.filters`, or `None` if `FILTERS_FILENAME`
148
+ is missing.
149
+ """
150
+ tmp_file = Path(job.working_dir) / FILTERS_FILENAME
151
+ try:
152
+ with tmp_file.open("r") as f:
153
+ new_filters = json.load(f)
154
+ return new_filters
155
+ except FileNotFoundError:
156
+ return None
@@ -0,0 +1,41 @@
1
+ from copy import copy
2
+
3
+ from fractal_server.app.runner.v2.deduplicate_list import deduplicate_list
4
+ from fractal_server.app.runner.v2.task_interface import TaskOutput
5
+ from fractal_server.images import SingleImage
6
+
7
+
8
+ def merge_outputs(task_outputs: list[TaskOutput]) -> TaskOutput:
9
+
10
+ final_image_list_updates = []
11
+ final_image_list_removals = []
12
+ last_new_filters = None
13
+
14
+ for ind, task_output in enumerate(task_outputs):
15
+
16
+ final_image_list_updates.extend(task_output.image_list_updates)
17
+ final_image_list_removals.extend(task_output.image_list_removals)
18
+
19
+ # Check that all filters are the same
20
+ current_new_filters = task_output.filters
21
+ if ind == 0:
22
+ last_new_filters = copy(current_new_filters)
23
+ if current_new_filters != last_new_filters:
24
+ raise ValueError(f"{current_new_filters=} but {last_new_filters=}")
25
+ last_new_filters = copy(current_new_filters)
26
+
27
+ final_image_list_updates = deduplicate_list(
28
+ final_image_list_updates, PydanticModel=SingleImage
29
+ )
30
+
31
+ additional_args = {}
32
+ if last_new_filters is not None:
33
+ additional_args["filters"] = last_new_filters
34
+
35
+ final_output = TaskOutput(
36
+ image_list_updates=final_image_list_updates,
37
+ image_list_removals=final_image_list_removals,
38
+ **additional_args,
39
+ )
40
+
41
+ return final_output
@@ -0,0 +1,264 @@
1
+ import json
2
+ from concurrent.futures import ThreadPoolExecutor
3
+ from copy import copy
4
+ from copy import deepcopy
5
+ from pathlib import Path
6
+ from typing import Callable
7
+ from typing import Optional
8
+
9
+ from ....images import Filters
10
+ from ....images import SingleImage
11
+ from ....images.tools import _filter_image_list
12
+ from ....images.tools import find_image_by_path
13
+ from ....images.tools import match_filter
14
+ from ..filenames import FILTERS_FILENAME
15
+ from ..filenames import HISTORY_FILENAME
16
+ from ..filenames import IMAGES_FILENAME
17
+ from .runner_functions import no_op_submit_setup_call
18
+ from .runner_functions import run_v1_task_parallel
19
+ from .runner_functions import run_v2_task_compound
20
+ from .runner_functions import run_v2_task_non_parallel
21
+ from .runner_functions import run_v2_task_parallel
22
+ from fractal_server.app.models.v2 import DatasetV2
23
+ from fractal_server.app.models.v2 import WorkflowTaskV2
24
+ from fractal_server.app.schemas.v2.dataset import _DatasetHistoryItemV2
25
+ from fractal_server.app.schemas.v2.workflowtask import WorkflowTaskStatusTypeV2
26
+
27
+ # FIXME: define RESERVED_ARGUMENTS = [", ...]
28
+
29
+
30
+ def execute_tasks_v2(
31
+ wf_task_list: list[WorkflowTaskV2],
32
+ dataset: DatasetV2,
33
+ executor: ThreadPoolExecutor,
34
+ workflow_dir: Path,
35
+ workflow_dir_user: Optional[Path] = None,
36
+ logger_name: Optional[str] = None,
37
+ submit_setup_call: Callable = no_op_submit_setup_call,
38
+ ) -> DatasetV2:
39
+
40
+ if not workflow_dir.exists(): # FIXME: this should have already happened
41
+ workflow_dir.mkdir()
42
+
43
+ # Initialize local dataset attributes
44
+ zarr_dir = dataset.zarr_dir
45
+ tmp_images = deepcopy(dataset.images)
46
+ tmp_filters = deepcopy(dataset.filters)
47
+ tmp_history = []
48
+
49
+ for wftask in wf_task_list:
50
+ task = wftask.task
51
+
52
+ # PRE TASK EXECUTION
53
+
54
+ # Get filtered images
55
+ pre_type_filters = copy(tmp_filters["types"])
56
+ pre_type_filters.update(wftask.input_filters["types"])
57
+ pre_attribute_filters = copy(tmp_filters["attributes"])
58
+ pre_attribute_filters.update(wftask.input_filters["attributes"])
59
+ filtered_images = _filter_image_list(
60
+ images=tmp_images,
61
+ filters=Filters(
62
+ types=pre_type_filters,
63
+ attributes=pre_attribute_filters,
64
+ ),
65
+ )
66
+ # Verify that filtered images comply with task input_types
67
+ for image in filtered_images:
68
+ if not match_filter(image, Filters(types=task.input_types)):
69
+ raise ValueError(
70
+ f"Filtered images include {image.dict()}, which does "
71
+ f"not comply with {task.input_types=}."
72
+ )
73
+
74
+ # TASK EXECUTION (V2)
75
+ if not wftask.is_legacy_task:
76
+ if task.type == "non_parallel":
77
+ current_task_output = run_v2_task_non_parallel(
78
+ images=filtered_images,
79
+ zarr_dir=zarr_dir,
80
+ wftask=wftask,
81
+ task=wftask.task,
82
+ workflow_dir=workflow_dir,
83
+ workflow_dir_user=workflow_dir_user,
84
+ executor=executor,
85
+ logger_name=logger_name,
86
+ submit_setup_call=submit_setup_call,
87
+ )
88
+ elif task.type == "parallel":
89
+ current_task_output = run_v2_task_parallel(
90
+ images=filtered_images,
91
+ wftask=wftask,
92
+ task=wftask.task,
93
+ workflow_dir=workflow_dir,
94
+ workflow_dir_user=workflow_dir_user,
95
+ executor=executor,
96
+ logger_name=logger_name,
97
+ submit_setup_call=submit_setup_call,
98
+ )
99
+ elif task.type == "compound":
100
+ current_task_output = run_v2_task_compound(
101
+ images=filtered_images,
102
+ zarr_dir=zarr_dir,
103
+ wftask=wftask,
104
+ task=wftask.task,
105
+ workflow_dir=workflow_dir,
106
+ workflow_dir_user=workflow_dir_user,
107
+ executor=executor,
108
+ logger_name=logger_name,
109
+ submit_setup_call=submit_setup_call,
110
+ )
111
+ else:
112
+ raise ValueError(f"Invalid {task.type=}.")
113
+ # TASK EXECUTION (V1)
114
+ else:
115
+ current_task_output = run_v1_task_parallel(
116
+ images=filtered_images,
117
+ wftask=wftask,
118
+ task_legacy=wftask.task_legacy,
119
+ executor=executor,
120
+ logger_name=logger_name,
121
+ submit_setup_call=submit_setup_call,
122
+ )
123
+
124
+ # POST TASK EXECUTION
125
+
126
+ # Update image list
127
+ current_task_output.check_paths_are_unique()
128
+ for image_obj in current_task_output.image_list_updates:
129
+ image = image_obj.dict()
130
+ # Edit existing image
131
+ if image["path"] in [_image["path"] for _image in tmp_images]:
132
+ if (
133
+ image["origin"] is not None
134
+ and image["origin"] != image["path"]
135
+ ):
136
+ raise ValueError(
137
+ f"Trying to edit an image with {image['path']=} "
138
+ f"and {image['origin']=}."
139
+ )
140
+ image_search = find_image_by_path(
141
+ images=tmp_images,
142
+ path=image["path"],
143
+ )
144
+ if image_search is None:
145
+ raise ValueError(
146
+ f"Image with path {image['path']} not found, while "
147
+ "updating image list."
148
+ )
149
+ original_img = image_search["image"]
150
+ original_index = image_search["index"]
151
+ updated_attributes = copy(original_img["attributes"])
152
+ updated_types = copy(original_img["types"])
153
+
154
+ # Update image attributes/types with task output and manifest
155
+ updated_attributes.update(image["attributes"])
156
+ updated_types.update(image["types"])
157
+ updated_types.update(task.output_types)
158
+
159
+ # Update image in the dataset image list
160
+ tmp_images[original_index]["attributes"] = updated_attributes
161
+ tmp_images[original_index]["types"] = updated_types
162
+ # Add new image
163
+ else:
164
+ # Check that image['path'] is relative to zarr_dir
165
+ if not image["path"].startswith(zarr_dir):
166
+ raise ValueError(
167
+ f"{zarr_dir} is not a parent directory of "
168
+ f"{image['path']}"
169
+ )
170
+ # Propagate attributes and types from `origin` (if any)
171
+ updated_attributes = {}
172
+ updated_types = {}
173
+ if image["origin"] is not None:
174
+ image_search = find_image_by_path(
175
+ images=tmp_images,
176
+ path=image["origin"],
177
+ )
178
+ if image_search is not None:
179
+ original_img = image_search["image"]
180
+ updated_attributes = copy(original_img["attributes"])
181
+ updated_types = copy(original_img["types"])
182
+ # Update image attributes/types with task output and manifest
183
+ updated_attributes.update(image["attributes"])
184
+ updated_types.update(image["types"])
185
+ updated_types.update(task.output_types)
186
+ new_image = SingleImage(
187
+ path=image["path"],
188
+ origin=image["origin"],
189
+ attributes=updated_attributes,
190
+ types=updated_types,
191
+ )
192
+ # Add image into the dataset image list
193
+ tmp_images.append(new_image.dict())
194
+
195
+ # Remove images from tmp_images
196
+ for image in current_task_output.image_list_removals:
197
+ image_search = find_image_by_path(
198
+ images=tmp_images, path=image["path"]
199
+ )
200
+ if image_search["index"] is None:
201
+ raise
202
+ else:
203
+ tmp_images.pop(image_search["index"])
204
+
205
+ # Update filters.attributes:
206
+ # current + (task_output: not really, in current examples..)
207
+ if current_task_output.filters is not None:
208
+ tmp_filters["attributes"].update(
209
+ current_task_output.filters.attributes
210
+ )
211
+
212
+ # Update filters.types: current + (task_output + task_manifest)
213
+ if wftask.is_legacy_task:
214
+ types_from_manifest = {}
215
+ else:
216
+ types_from_manifest = task.output_types
217
+ if current_task_output.filters is not None:
218
+ types_from_task = current_task_output.filters.types
219
+ else:
220
+ types_from_task = {}
221
+ # Check that key sets are disjoint
222
+ set_types_from_manifest = set(types_from_manifest.keys())
223
+ set_types_from_task = set(types_from_task.keys())
224
+ if not set_types_from_manifest.isdisjoint(set_types_from_task):
225
+ overlap = set_types_from_manifest.intersection(set_types_from_task)
226
+ raise ValueError(
227
+ "Both task and task manifest did set the same"
228
+ f"output type. Overlapping keys: {overlap}."
229
+ )
230
+ # Update filters.types
231
+ tmp_filters["types"].update(types_from_manifest)
232
+ tmp_filters["types"].update(types_from_task)
233
+
234
+ # Update history (based on _DatasetHistoryItemV2)
235
+ history_item = _DatasetHistoryItemV2(
236
+ workflowtask=wftask,
237
+ status=WorkflowTaskStatusTypeV2.DONE,
238
+ parallelization=dict(
239
+ # task_type=wftask.task.type, # FIXME: breaks for V1 tasks
240
+ # component_list=fil, #FIXME
241
+ ),
242
+ ).dict()
243
+ tmp_history.append(history_item)
244
+
245
+ # Write current dataset attributes (history, images, filters) into
246
+ # temporary files which can be used (1) to retrieve the latest state
247
+ # when the job fails, (2) from within endpoints that need up-to-date
248
+ # information
249
+ with open(workflow_dir / HISTORY_FILENAME, "w") as f:
250
+ json.dump(tmp_history, f, indent=2)
251
+ with open(workflow_dir / FILTERS_FILENAME, "w") as f:
252
+ json.dump(tmp_filters, f, indent=2)
253
+ with open(workflow_dir / IMAGES_FILENAME, "w") as f:
254
+ json.dump(tmp_images, f, indent=2)
255
+
256
+ # NOTE: tmp_history only contains the newly-added history items (to be
257
+ # appended to the original history), while tmp_filters and tmp_images
258
+ # represent the new attributes (to replace the original ones)
259
+ result = dict(
260
+ history=tmp_history,
261
+ filters=tmp_filters,
262
+ images=tmp_images,
263
+ )
264
+ return result