fractal-server 2.14.0a33__py3-none-any.whl → 2.14.0a35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/runner/executors/base_runner.py +4 -0
- fractal_server/app/runner/executors/local/runner.py +97 -35
- fractal_server/app/runner/executors/slurm_common/base_slurm_runner.py +327 -253
- fractal_server/app/runner/executors/slurm_common/remote.py +14 -11
- fractal_server/app/runner/executors/slurm_ssh/runner.py +66 -6
- fractal_server/app/runner/executors/slurm_sudo/_subprocess_run_as_user.py +0 -15
- fractal_server/app/runner/executors/slurm_sudo/runner.py +13 -1
- fractal_server/app/runner/v2/runner.py +3 -0
- fractal_server/app/runner/v2/runner_functions.py +7 -0
- fractal_server/ssh/_fabric.py +24 -12
- {fractal_server-2.14.0a33.dist-info → fractal_server-2.14.0a35.dist-info}/METADATA +1 -1
- {fractal_server-2.14.0a33.dist-info → fractal_server-2.14.0a35.dist-info}/RECORD +16 -16
- {fractal_server-2.14.0a33.dist-info → fractal_server-2.14.0a35.dist-info}/LICENSE +0 -0
- {fractal_server-2.14.0a33.dist-info → fractal_server-2.14.0a35.dist-info}/WHEEL +0 -0
- {fractal_server-2.14.0a33.dist-info → fractal_server-2.14.0a35.dist-info}/entry_points.txt +0 -0
fractal_server/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__VERSION__ = "2.14.
|
1
|
+
__VERSION__ = "2.14.0a35"
|
@@ -32,6 +32,7 @@ class BaseRunner(object):
|
|
32
32
|
task_type: TaskTypeType,
|
33
33
|
task_files: TaskFiles,
|
34
34
|
config: Any,
|
35
|
+
user_id: int,
|
35
36
|
) -> tuple[Any, BaseException]:
|
36
37
|
"""
|
37
38
|
Run a single fractal task.
|
@@ -44,6 +45,7 @@ class BaseRunner(object):
|
|
44
45
|
task_type: Task type.
|
45
46
|
task_files: `TaskFiles` object.
|
46
47
|
config: Runner-specific parameters.
|
48
|
+
user_id:
|
47
49
|
"""
|
48
50
|
raise NotImplementedError()
|
49
51
|
|
@@ -55,6 +57,7 @@ class BaseRunner(object):
|
|
55
57
|
list_task_files: list[TaskFiles],
|
56
58
|
task_type: TaskTypeType,
|
57
59
|
config: Any,
|
60
|
+
user_id: int,
|
58
61
|
) -> tuple[dict[int, Any], dict[int, BaseException]]:
|
59
62
|
"""
|
60
63
|
Run a parallel fractal task.
|
@@ -68,6 +71,7 @@ class BaseRunner(object):
|
|
68
71
|
task_type: Task type.
|
69
72
|
task_files: `TaskFiles` object.
|
70
73
|
config: Runner-specific parameters.
|
74
|
+
user_id
|
71
75
|
"""
|
72
76
|
raise NotImplementedError()
|
73
77
|
|
@@ -9,6 +9,9 @@ from fractal_server.app.db import get_sync_db
|
|
9
9
|
from fractal_server.app.runner.exceptions import TaskExecutionError
|
10
10
|
from fractal_server.app.runner.executors.base_runner import BaseRunner
|
11
11
|
from fractal_server.app.runner.task_files import TaskFiles
|
12
|
+
from fractal_server.app.runner.v2.db_tools import (
|
13
|
+
bulk_update_status_of_history_unit,
|
14
|
+
)
|
12
15
|
from fractal_server.app.runner.v2.db_tools import update_status_of_history_unit
|
13
16
|
from fractal_server.app.schemas.v2 import HistoryUnitStatus
|
14
17
|
from fractal_server.logger import set_logger
|
@@ -55,19 +58,35 @@ class LocalRunner(BaseRunner):
|
|
55
58
|
"converter_compound",
|
56
59
|
],
|
57
60
|
config: LocalBackendConfig,
|
61
|
+
user_id: int,
|
58
62
|
) -> tuple[Any, Exception]:
|
59
63
|
logger.debug("[submit] START")
|
60
64
|
|
61
|
-
|
62
|
-
|
63
|
-
|
65
|
+
try:
|
66
|
+
self.validate_submit_parameters(parameters, task_type=task_type)
|
67
|
+
workdir_local = task_files.wftask_subfolder_local
|
68
|
+
workdir_local.mkdir()
|
64
69
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
70
|
+
# SUBMISSION PHASE
|
71
|
+
future = self.executor.submit(
|
72
|
+
func,
|
73
|
+
parameters=parameters,
|
74
|
+
remote_files=task_files.remote_files_dict,
|
75
|
+
)
|
76
|
+
except Exception as e:
|
77
|
+
logger.error(
|
78
|
+
"[submit] Unexpected exception during submission. "
|
79
|
+
f"Original error {str(e)}"
|
80
|
+
)
|
81
|
+
result = None
|
82
|
+
exception = TaskExecutionError(str(e))
|
83
|
+
with next(get_sync_db()) as db:
|
84
|
+
update_status_of_history_unit(
|
85
|
+
history_unit_id=history_unit_id,
|
86
|
+
status=HistoryUnitStatus.FAILED,
|
87
|
+
db_sync=db,
|
88
|
+
)
|
89
|
+
return None, exception
|
71
90
|
|
72
91
|
# RETRIEVAL PHASE
|
73
92
|
with next(get_sync_db()) as db:
|
@@ -98,6 +117,7 @@ class LocalRunner(BaseRunner):
|
|
98
117
|
list_task_files: list[TaskFiles],
|
99
118
|
task_type: Literal["parallel", "compound", "converter_compound"],
|
100
119
|
config: LocalBackendConfig,
|
120
|
+
user_id: int,
|
101
121
|
) -> tuple[dict[int, Any], dict[int, BaseException]]:
|
102
122
|
"""
|
103
123
|
Note: `list_parameters`, `list_task_files` and `history_unit_ids`
|
@@ -105,29 +125,50 @@ class LocalRunner(BaseRunner):
|
|
105
125
|
input images, while for compound tasks these can differ.
|
106
126
|
"""
|
107
127
|
|
108
|
-
self.validate_multisubmit_parameters(
|
109
|
-
list_parameters=list_parameters,
|
110
|
-
task_type=task_type,
|
111
|
-
list_task_files=list_task_files,
|
112
|
-
history_unit_ids=history_unit_ids,
|
113
|
-
)
|
114
|
-
|
115
128
|
logger.debug(f"[multisubmit] START, {len(list_parameters)=}")
|
129
|
+
results: dict[int, Any] = {}
|
130
|
+
exceptions: dict[int, BaseException] = {}
|
116
131
|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
132
|
+
try:
|
133
|
+
|
134
|
+
self.validate_multisubmit_parameters(
|
135
|
+
list_parameters=list_parameters,
|
136
|
+
task_type=task_type,
|
137
|
+
list_task_files=list_task_files,
|
138
|
+
history_unit_ids=history_unit_ids,
|
139
|
+
)
|
140
|
+
|
141
|
+
workdir_local = list_task_files[0].wftask_subfolder_local
|
142
|
+
if task_type == "parallel":
|
143
|
+
workdir_local.mkdir()
|
144
|
+
|
145
|
+
# Set `n_elements` and `parallel_tasks_per_job`
|
146
|
+
n_elements = len(list_parameters)
|
147
|
+
parallel_tasks_per_job = config.parallel_tasks_per_job
|
148
|
+
if parallel_tasks_per_job is None:
|
149
|
+
parallel_tasks_per_job = n_elements
|
150
|
+
|
151
|
+
except Exception as e:
|
152
|
+
logger.error(
|
153
|
+
"[multisubmit] Unexpected exception during preliminary phase. "
|
154
|
+
f"Original error {str(e)}"
|
155
|
+
)
|
156
|
+
exception = TaskExecutionError(str(e))
|
157
|
+
exceptions = {
|
158
|
+
ind: exception for ind in range(len(list_parameters))
|
159
|
+
}
|
160
|
+
if task_type == "parallel":
|
161
|
+
with next(get_sync_db()) as db:
|
162
|
+
bulk_update_status_of_history_unit(
|
163
|
+
history_unit_ids=history_unit_ids,
|
164
|
+
status=HistoryUnitStatus.FAILED,
|
165
|
+
db_sync=db,
|
166
|
+
)
|
167
|
+
return results, exceptions
|
126
168
|
|
127
169
|
# Execute tasks, in chunks of size `parallel_tasks_per_job`
|
128
|
-
results: dict[int, Any] = {}
|
129
|
-
exceptions: dict[int, BaseException] = {}
|
130
170
|
for ind_chunk in range(0, n_elements, parallel_tasks_per_job):
|
171
|
+
|
131
172
|
list_parameters_chunk = list_parameters[
|
132
173
|
ind_chunk : ind_chunk + parallel_tasks_per_job
|
133
174
|
]
|
@@ -135,15 +176,31 @@ class LocalRunner(BaseRunner):
|
|
135
176
|
active_futures: dict[int, Future] = {}
|
136
177
|
for ind_within_chunk, kwargs in enumerate(list_parameters_chunk):
|
137
178
|
positional_index = ind_chunk + ind_within_chunk
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
179
|
+
try:
|
180
|
+
future = self.executor.submit(
|
181
|
+
func,
|
182
|
+
parameters=kwargs,
|
183
|
+
remote_files=list_task_files[
|
184
|
+
positional_index
|
185
|
+
].remote_files_dict,
|
186
|
+
)
|
187
|
+
active_futures[positional_index] = future
|
188
|
+
except Exception as e:
|
189
|
+
logger.error(
|
190
|
+
"[multisubmit] Unexpected exception during submission."
|
191
|
+
f" Original error {str(e)}"
|
192
|
+
)
|
193
|
+
current_history_unit_id = history_unit_ids[
|
142
194
|
positional_index
|
143
|
-
]
|
144
|
-
|
145
|
-
|
146
|
-
|
195
|
+
]
|
196
|
+
exceptions[positional_index] = TaskExecutionError(str(e))
|
197
|
+
if task_type == "parallel":
|
198
|
+
with next(get_sync_db()) as db:
|
199
|
+
update_status_of_history_unit(
|
200
|
+
history_unit_id=current_history_unit_id,
|
201
|
+
status=HistoryUnitStatus.FAILED,
|
202
|
+
db_sync=db,
|
203
|
+
)
|
147
204
|
while active_futures:
|
148
205
|
finished_futures = [
|
149
206
|
index_and_future
|
@@ -171,6 +228,11 @@ class LocalRunner(BaseRunner):
|
|
171
228
|
)
|
172
229
|
|
173
230
|
except Exception as e:
|
231
|
+
logger.debug(
|
232
|
+
"Multisubmit failed in retrieval "
|
233
|
+
"phase with the following error "
|
234
|
+
f"{str(e)}"
|
235
|
+
)
|
174
236
|
exceptions[positional_index] = TaskExecutionError(
|
175
237
|
str(e)
|
176
238
|
)
|