fractal-server 2.13.0__py3-none-any.whl → 2.14.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/history/__init__.py +4 -0
- fractal_server/app/history/image_updates.py +142 -0
- fractal_server/app/history/status_enum.py +16 -0
- fractal_server/app/models/v2/__init__.py +9 -1
- fractal_server/app/models/v2/accounting.py +35 -0
- fractal_server/app/models/v2/history.py +53 -0
- fractal_server/app/routes/admin/v2/__init__.py +4 -0
- fractal_server/app/routes/admin/v2/accounting.py +108 -0
- fractal_server/app/routes/admin/v2/impersonate.py +35 -0
- fractal_server/app/routes/admin/v2/job.py +5 -13
- fractal_server/app/routes/admin/v2/task_group.py +4 -12
- fractal_server/app/routes/api/v2/__init__.py +2 -2
- fractal_server/app/routes/api/v2/_aux_functions.py +78 -0
- fractal_server/app/routes/api/v2/_aux_functions_task_lifecycle.py +3 -3
- fractal_server/app/routes/api/v2/dataset.py +12 -9
- fractal_server/app/routes/api/v2/history.py +247 -0
- fractal_server/app/routes/api/v2/submit.py +1 -0
- fractal_server/app/routes/api/v2/task_group.py +2 -5
- fractal_server/app/routes/api/v2/workflow.py +18 -3
- fractal_server/app/routes/api/v2/workflowtask.py +22 -0
- fractal_server/app/routes/aux/__init__.py +0 -20
- fractal_server/app/runner/executors/base_runner.py +114 -0
- fractal_server/app/runner/{v2/_local → executors/local}/_local_config.py +3 -3
- fractal_server/app/runner/executors/local/_submit_setup.py +54 -0
- fractal_server/app/runner/executors/local/runner.py +200 -0
- fractal_server/app/runner/executors/{slurm → slurm_common}/_batching.py +1 -1
- fractal_server/app/runner/executors/{slurm → slurm_common}/_slurm_config.py +3 -3
- fractal_server/app/runner/{v2/_slurm_ssh → executors/slurm_common}/_submit_setup.py +13 -12
- fractal_server/app/runner/{v2/_slurm_common → executors/slurm_common}/get_slurm_config.py +9 -15
- fractal_server/app/runner/executors/{slurm/ssh → slurm_ssh}/_executor_wait_thread.py +1 -1
- fractal_server/app/runner/executors/{slurm/ssh → slurm_ssh}/_slurm_job.py +1 -1
- fractal_server/app/runner/executors/{slurm/ssh → slurm_ssh}/executor.py +13 -14
- fractal_server/app/runner/executors/{slurm/sudo → slurm_sudo}/_check_jobs_status.py +11 -9
- fractal_server/app/runner/executors/{slurm/sudo → slurm_sudo}/_executor_wait_thread.py +3 -3
- fractal_server/app/runner/executors/{slurm/sudo → slurm_sudo}/_subprocess_run_as_user.py +2 -68
- fractal_server/app/runner/executors/slurm_sudo/runner.py +632 -0
- fractal_server/app/runner/task_files.py +70 -96
- fractal_server/app/runner/v2/__init__.py +9 -19
- fractal_server/app/runner/v2/_local.py +84 -0
- fractal_server/app/runner/v2/{_slurm_ssh/__init__.py → _slurm_ssh.py} +12 -13
- fractal_server/app/runner/v2/{_slurm_sudo/__init__.py → _slurm_sudo.py} +12 -12
- fractal_server/app/runner/v2/runner.py +106 -31
- fractal_server/app/runner/v2/runner_functions.py +88 -64
- fractal_server/app/runner/v2/runner_functions_low_level.py +20 -20
- fractal_server/app/schemas/v2/__init__.py +1 -0
- fractal_server/app/schemas/v2/accounting.py +18 -0
- fractal_server/app/schemas/v2/dataset.py +0 -17
- fractal_server/app/schemas/v2/history.py +23 -0
- fractal_server/config.py +58 -52
- fractal_server/migrations/versions/8223fcef886c_image_status.py +63 -0
- fractal_server/migrations/versions/87cd72a537a2_add_historyitem_table.py +68 -0
- fractal_server/migrations/versions/af1ef1c83c9b_add_accounting_tables.py +57 -0
- fractal_server/tasks/v2/utils_background.py +1 -1
- {fractal_server-2.13.0.dist-info → fractal_server-2.14.0a0.dist-info}/METADATA +1 -1
- {fractal_server-2.13.0.dist-info → fractal_server-2.14.0a0.dist-info}/RECORD +66 -55
- fractal_server/app/routes/api/v2/status.py +0 -168
- fractal_server/app/runner/executors/slurm/sudo/executor.py +0 -1281
- fractal_server/app/runner/v2/_local/__init__.py +0 -129
- fractal_server/app/runner/v2/_local/_submit_setup.py +0 -52
- fractal_server/app/runner/v2/_local/executor.py +0 -100
- fractal_server/app/runner/v2/_slurm_sudo/_submit_setup.py +0 -83
- fractal_server/app/runner/v2/handle_failed_job.py +0 -59
- /fractal_server/app/runner/executors/{slurm → local}/__init__.py +0 -0
- /fractal_server/app/runner/executors/{slurm/ssh → slurm_common}/__init__.py +0 -0
- /fractal_server/app/runner/executors/{_job_states.py → slurm_common/_job_states.py} +0 -0
- /fractal_server/app/runner/executors/{slurm → slurm_common}/remote.py +0 -0
- /fractal_server/app/runner/executors/{slurm → slurm_common}/utils_executors.py +0 -0
- /fractal_server/app/runner/executors/{slurm/sudo → slurm_ssh}/__init__.py +0 -0
- /fractal_server/app/runner/{v2/_slurm_common → executors/slurm_sudo}/__init__.py +0 -0
- {fractal_server-2.13.0.dist-info → fractal_server-2.14.0a0.dist-info}/LICENSE +0 -0
- {fractal_server-2.13.0.dist-info → fractal_server-2.14.0a0.dist-info}/WHEEL +0 -0
- {fractal_server-2.13.0.dist-info → fractal_server-2.14.0a0.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,5 @@
|
|
1
|
+
import json
|
1
2
|
import logging
|
2
|
-
from concurrent.futures import ThreadPoolExecutor
|
3
3
|
from copy import copy
|
4
4
|
from copy import deepcopy
|
5
5
|
from pathlib import Path
|
@@ -18,10 +18,14 @@ from .runner_functions import run_v2_task_non_parallel
|
|
18
18
|
from .runner_functions import run_v2_task_parallel
|
19
19
|
from .task_interface import TaskOutput
|
20
20
|
from fractal_server.app.db import get_sync_db
|
21
|
+
from fractal_server.app.history.status_enum import HistoryItemImageStatus
|
22
|
+
from fractal_server.app.models.v2 import AccountingRecord
|
21
23
|
from fractal_server.app.models.v2 import DatasetV2
|
24
|
+
from fractal_server.app.models.v2 import HistoryItemV2
|
25
|
+
from fractal_server.app.models.v2 import ImageStatus
|
26
|
+
from fractal_server.app.models.v2 import TaskGroupV2
|
22
27
|
from fractal_server.app.models.v2 import WorkflowTaskV2
|
23
|
-
from fractal_server.app.
|
24
|
-
from fractal_server.app.schemas.v2.workflowtask import WorkflowTaskStatusTypeV2
|
28
|
+
from fractal_server.app.runner.executors.base_runner import BaseRunner
|
25
29
|
from fractal_server.images.models import AttributeFiltersType
|
26
30
|
from fractal_server.images.tools import merge_type_filters
|
27
31
|
|
@@ -30,7 +34,8 @@ def execute_tasks_v2(
|
|
30
34
|
*,
|
31
35
|
wf_task_list: list[WorkflowTaskV2],
|
32
36
|
dataset: DatasetV2,
|
33
|
-
|
37
|
+
runner: BaseRunner,
|
38
|
+
user_id: int,
|
34
39
|
workflow_dir_local: Path,
|
35
40
|
workflow_dir_remote: Optional[Path] = None,
|
36
41
|
logger_name: Optional[str] = None,
|
@@ -41,8 +46,8 @@ def execute_tasks_v2(
|
|
41
46
|
|
42
47
|
if not workflow_dir_local.exists():
|
43
48
|
logger.warning(
|
44
|
-
f"Now creating {workflow_dir_local}, "
|
45
|
-
"
|
49
|
+
f"Now creating {workflow_dir_local}, but it "
|
50
|
+
"should have already happened."
|
46
51
|
)
|
47
52
|
workflow_dir_local.mkdir()
|
48
53
|
|
@@ -58,69 +63,116 @@ def execute_tasks_v2(
|
|
58
63
|
|
59
64
|
# PRE TASK EXECUTION
|
60
65
|
|
61
|
-
#
|
66
|
+
# Filter images by types and attributes (in two steps)
|
62
67
|
type_filters = copy(current_dataset_type_filters)
|
63
68
|
type_filters_patch = merge_type_filters(
|
64
69
|
task_input_types=task.input_types,
|
65
70
|
wftask_type_filters=wftask.type_filters,
|
66
71
|
)
|
67
72
|
type_filters.update(type_filters_patch)
|
68
|
-
|
73
|
+
type_filtered_images = filter_image_list(
|
69
74
|
images=tmp_images,
|
70
75
|
type_filters=type_filters,
|
76
|
+
attribute_filters=None,
|
77
|
+
)
|
78
|
+
filtered_images = filter_image_list(
|
79
|
+
images=type_filtered_images,
|
80
|
+
type_filters=None,
|
71
81
|
attribute_filters=job_attribute_filters,
|
72
82
|
)
|
73
83
|
|
74
|
-
#
|
84
|
+
# Create history item
|
75
85
|
with next(get_sync_db()) as db:
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
86
|
+
workflowtask_dump = dict(
|
87
|
+
**wftask.model_dump(exclude={"task"}),
|
88
|
+
task=wftask.task.model_dump(),
|
89
|
+
)
|
90
|
+
# Exclude timestamps since they'd need to be serialized properly
|
91
|
+
task_group = db.get(TaskGroupV2, wftask.task.taskgroupv2_id)
|
92
|
+
task_group_dump = task_group.model_dump(
|
93
|
+
exclude={
|
94
|
+
"timestamp_created",
|
95
|
+
"timestamp_last_used",
|
96
|
+
}
|
97
|
+
)
|
98
|
+
parameters_hash = str(
|
99
|
+
hash(
|
100
|
+
json.dumps(
|
101
|
+
[workflowtask_dump, task_group_dump],
|
102
|
+
sort_keys=True,
|
103
|
+
indent=None,
|
104
|
+
).encode("utf-8")
|
105
|
+
)
|
106
|
+
)
|
107
|
+
images = {
|
108
|
+
image["zarr_url"]: HistoryItemImageStatus.SUBMITTED
|
109
|
+
for image in filtered_images
|
110
|
+
}
|
111
|
+
history_item = HistoryItemV2(
|
112
|
+
dataset_id=dataset.id,
|
113
|
+
workflowtask_id=wftask.id,
|
114
|
+
workflowtask_dump=workflowtask_dump,
|
115
|
+
task_group_dump=task_group_dump,
|
116
|
+
parameters_hash=parameters_hash,
|
117
|
+
num_available_images=len(type_filtered_images),
|
118
|
+
num_current_images=len(filtered_images),
|
119
|
+
images=images,
|
120
|
+
)
|
121
|
+
db.add(history_item)
|
122
|
+
for image in filtered_images:
|
123
|
+
db.merge(
|
124
|
+
ImageStatus(
|
125
|
+
zarr_url=image["zarr_url"],
|
126
|
+
workflowtask_id=wftask.id,
|
127
|
+
dataset_id=dataset.id,
|
128
|
+
parameters_hash=parameters_hash,
|
129
|
+
status=HistoryItemImageStatus.SUBMITTED,
|
130
|
+
logfile="/placeholder",
|
131
|
+
)
|
132
|
+
)
|
88
133
|
db.commit()
|
134
|
+
db.refresh(history_item)
|
135
|
+
history_item_id = history_item.id
|
136
|
+
|
89
137
|
# TASK EXECUTION (V2)
|
90
138
|
if task.type == "non_parallel":
|
91
|
-
|
139
|
+
(
|
140
|
+
current_task_output,
|
141
|
+
num_tasks,
|
142
|
+
exceptions,
|
143
|
+
) = run_v2_task_non_parallel(
|
92
144
|
images=filtered_images,
|
93
145
|
zarr_dir=zarr_dir,
|
94
146
|
wftask=wftask,
|
95
147
|
task=task,
|
96
148
|
workflow_dir_local=workflow_dir_local,
|
97
149
|
workflow_dir_remote=workflow_dir_remote,
|
98
|
-
executor=
|
99
|
-
logger_name=logger_name,
|
150
|
+
executor=runner,
|
100
151
|
submit_setup_call=submit_setup_call,
|
152
|
+
history_item_id=history_item_id,
|
101
153
|
)
|
102
154
|
elif task.type == "parallel":
|
103
|
-
current_task_output = run_v2_task_parallel(
|
155
|
+
current_task_output, num_tasks, exceptions = run_v2_task_parallel(
|
104
156
|
images=filtered_images,
|
105
157
|
wftask=wftask,
|
106
158
|
task=task,
|
107
159
|
workflow_dir_local=workflow_dir_local,
|
108
160
|
workflow_dir_remote=workflow_dir_remote,
|
109
|
-
executor=
|
110
|
-
logger_name=logger_name,
|
161
|
+
executor=runner,
|
111
162
|
submit_setup_call=submit_setup_call,
|
163
|
+
history_item_id=history_item_id,
|
112
164
|
)
|
113
165
|
elif task.type == "compound":
|
114
|
-
current_task_output = run_v2_task_compound(
|
166
|
+
current_task_output, num_tasks, exceptions = run_v2_task_compound(
|
115
167
|
images=filtered_images,
|
116
168
|
zarr_dir=zarr_dir,
|
117
169
|
wftask=wftask,
|
118
170
|
task=task,
|
119
171
|
workflow_dir_local=workflow_dir_local,
|
120
172
|
workflow_dir_remote=workflow_dir_remote,
|
121
|
-
executor=
|
122
|
-
logger_name=logger_name,
|
173
|
+
executor=runner,
|
123
174
|
submit_setup_call=submit_setup_call,
|
175
|
+
history_item_id=history_item_id,
|
124
176
|
)
|
125
177
|
else:
|
126
178
|
raise ValueError(f"Unexpected error: Invalid {task.type=}.")
|
@@ -144,7 +196,10 @@ def execute_tasks_v2(
|
|
144
196
|
)
|
145
197
|
|
146
198
|
# Update image list
|
199
|
+
num_new_images = 0
|
147
200
|
current_task_output.check_zarr_urls_are_unique()
|
201
|
+
# FIXME: Introduce for loop over task outputs, and processe them sequentially
|
202
|
+
# each failure should lead to an update of the specific image status
|
148
203
|
for image_obj in current_task_output.image_list_updates:
|
149
204
|
image = image_obj.model_dump()
|
150
205
|
# Edit existing image
|
@@ -246,6 +301,7 @@ def execute_tasks_v2(
|
|
246
301
|
SingleImage(**new_image)
|
247
302
|
# Add image into the dataset image list
|
248
303
|
tmp_images.append(new_image)
|
304
|
+
num_new_images += 1
|
249
305
|
|
250
306
|
# Remove images from tmp_images
|
251
307
|
for img_zarr_url in current_task_output.image_list_removals:
|
@@ -269,7 +325,6 @@ def execute_tasks_v2(
|
|
269
325
|
# information
|
270
326
|
with next(get_sync_db()) as db:
|
271
327
|
db_dataset = db.get(DatasetV2, dataset.id)
|
272
|
-
db_dataset.history[-1]["status"] = WorkflowTaskStatusTypeV2.DONE
|
273
328
|
db_dataset.type_filters = current_dataset_type_filters
|
274
329
|
db_dataset.images = tmp_images
|
275
330
|
for attribute_name in [
|
@@ -281,4 +336,24 @@ def execute_tasks_v2(
|
|
281
336
|
db.merge(db_dataset)
|
282
337
|
db.commit()
|
283
338
|
|
339
|
+
# Create accounting record
|
340
|
+
record = AccountingRecord(
|
341
|
+
user_id=user_id,
|
342
|
+
num_tasks=num_tasks,
|
343
|
+
num_new_images=num_new_images,
|
344
|
+
)
|
345
|
+
db.add(record)
|
346
|
+
db.commit()
|
347
|
+
|
348
|
+
if exceptions != {}:
|
349
|
+
logger.error(
|
350
|
+
f'END {wftask.order}-th task (name="{task_name}") '
|
351
|
+
"- ERROR."
|
352
|
+
)
|
353
|
+
# Raise first error
|
354
|
+
for key, value in exceptions.items():
|
355
|
+
raise JobExecutionError(
|
356
|
+
info=(f"An error occurred.\nOriginal error:\n{value}")
|
357
|
+
)
|
358
|
+
|
284
359
|
logger.debug(f'END {wftask.order}-th task (name="{task_name}")')
|
@@ -1,7 +1,6 @@
|
|
1
1
|
import functools
|
2
2
|
import logging
|
3
3
|
import traceback
|
4
|
-
from concurrent.futures import Executor
|
5
4
|
from pathlib import Path
|
6
5
|
from typing import Any
|
7
6
|
from typing import Callable
|
@@ -20,6 +19,7 @@ from fractal_server.app.models.v2 import TaskV2
|
|
20
19
|
from fractal_server.app.models.v2 import WorkflowTaskV2
|
21
20
|
from fractal_server.app.runner.components import _COMPONENT_KEY_
|
22
21
|
from fractal_server.app.runner.components import _index_to_component
|
22
|
+
from fractal_server.app.runner.executors.base_runner import BaseRunner
|
23
23
|
|
24
24
|
|
25
25
|
__all__ = [
|
@@ -59,13 +59,7 @@ def _cast_and_validate_InitTaskOutput(
|
|
59
59
|
)
|
60
60
|
|
61
61
|
|
62
|
-
def no_op_submit_setup_call(
|
63
|
-
*,
|
64
|
-
wftask: WorkflowTaskV2,
|
65
|
-
workflow_dir_local: Path,
|
66
|
-
workflow_dir_remote: Path,
|
67
|
-
which_type: Literal["non_parallel", "parallel"],
|
68
|
-
) -> dict:
|
62
|
+
def no_op_submit_setup_call(*args, **kwargs) -> dict:
|
69
63
|
"""
|
70
64
|
Default (no-operation) interface of submit_setup_call in V2.
|
71
65
|
"""
|
@@ -84,8 +78,8 @@ def _get_executor_options(
|
|
84
78
|
try:
|
85
79
|
options = submit_setup_call(
|
86
80
|
wftask=wftask,
|
87
|
-
|
88
|
-
|
81
|
+
root_dir_local=workflow_dir_local,
|
82
|
+
root_dir_remote=workflow_dir_remote,
|
89
83
|
which_type=which_type,
|
90
84
|
)
|
91
85
|
except Exception as e:
|
@@ -114,10 +108,10 @@ def run_v2_task_non_parallel(
|
|
114
108
|
wftask: WorkflowTaskV2,
|
115
109
|
workflow_dir_local: Path,
|
116
110
|
workflow_dir_remote: Optional[Path] = None,
|
117
|
-
executor:
|
118
|
-
logger_name: Optional[str] = None,
|
111
|
+
executor: BaseRunner,
|
119
112
|
submit_setup_call: Callable = no_op_submit_setup_call,
|
120
|
-
|
113
|
+
history_item_id: int,
|
114
|
+
) -> tuple[TaskOutput, int, dict[int, BaseException]]:
|
121
115
|
"""
|
122
116
|
This runs server-side (see `executor` argument)
|
123
117
|
"""
|
@@ -142,22 +136,29 @@ def run_v2_task_non_parallel(
|
|
142
136
|
zarr_dir=zarr_dir,
|
143
137
|
**(wftask.args_non_parallel or {}),
|
144
138
|
)
|
145
|
-
|
139
|
+
function_kwargs[_COMPONENT_KEY_] = _index_to_component(0)
|
140
|
+
|
141
|
+
result, exception = executor.submit(
|
146
142
|
functools.partial(
|
147
143
|
run_single_task,
|
148
144
|
wftask=wftask,
|
149
145
|
command=task.command_non_parallel,
|
150
|
-
|
151
|
-
|
146
|
+
root_dir_local=workflow_dir_local,
|
147
|
+
root_dir_remote=workflow_dir_remote,
|
152
148
|
),
|
153
|
-
function_kwargs,
|
149
|
+
parameters=function_kwargs,
|
150
|
+
history_item_id=history_item_id,
|
154
151
|
**executor_options,
|
155
152
|
)
|
156
|
-
|
157
|
-
|
158
|
-
|
153
|
+
|
154
|
+
num_tasks = 1
|
155
|
+
if exception is None:
|
156
|
+
if result is None:
|
157
|
+
return (TaskOutput(), num_tasks, {})
|
158
|
+
else:
|
159
|
+
return (_cast_and_validate_TaskOutput(result), num_tasks, {})
|
159
160
|
else:
|
160
|
-
return
|
161
|
+
return (TaskOutput(), num_tasks, {0: exception})
|
161
162
|
|
162
163
|
|
163
164
|
def run_v2_task_parallel(
|
@@ -165,15 +166,15 @@ def run_v2_task_parallel(
|
|
165
166
|
images: list[dict[str, Any]],
|
166
167
|
task: TaskV2,
|
167
168
|
wftask: WorkflowTaskV2,
|
168
|
-
executor:
|
169
|
+
executor: BaseRunner,
|
169
170
|
workflow_dir_local: Path,
|
170
171
|
workflow_dir_remote: Optional[Path] = None,
|
171
|
-
logger_name: Optional[str] = None,
|
172
172
|
submit_setup_call: Callable = no_op_submit_setup_call,
|
173
|
-
|
173
|
+
history_item_id: int,
|
174
|
+
) -> tuple[TaskOutput, int, dict[int, BaseException]]:
|
174
175
|
|
175
176
|
if len(images) == 0:
|
176
|
-
return TaskOutput()
|
177
|
+
return (TaskOutput(), 0, {})
|
177
178
|
|
178
179
|
_check_parallelization_list_size(images)
|
179
180
|
|
@@ -195,29 +196,36 @@ def run_v2_task_parallel(
|
|
195
196
|
)
|
196
197
|
list_function_kwargs[-1][_COMPONENT_KEY_] = _index_to_component(ind)
|
197
198
|
|
198
|
-
|
199
|
+
results, exceptions = executor.multisubmit(
|
199
200
|
functools.partial(
|
200
201
|
run_single_task,
|
201
202
|
wftask=wftask,
|
202
203
|
command=task.command_parallel,
|
203
|
-
|
204
|
-
|
204
|
+
root_dir_local=workflow_dir_local,
|
205
|
+
root_dir_remote=workflow_dir_remote,
|
205
206
|
),
|
206
|
-
list_function_kwargs,
|
207
|
+
list_parameters=list_function_kwargs,
|
208
|
+
history_item_id=history_item_id,
|
207
209
|
**executor_options,
|
208
210
|
)
|
209
|
-
# Explicitly iterate over the whole list, so that all futures are waited
|
210
|
-
outputs = list(results_iterator)
|
211
211
|
|
212
|
-
|
213
|
-
for ind
|
214
|
-
if
|
215
|
-
|
212
|
+
outputs = []
|
213
|
+
for ind in range(len(list_function_kwargs)):
|
214
|
+
if ind in results.keys():
|
215
|
+
result = results[ind]
|
216
|
+
if result is None:
|
217
|
+
output = TaskOutput()
|
218
|
+
else:
|
219
|
+
output = _cast_and_validate_TaskOutput(result)
|
220
|
+
outputs.append(output)
|
221
|
+
elif ind in exceptions.keys():
|
222
|
+
print(f"Bad: {exceptions[ind]}")
|
216
223
|
else:
|
217
|
-
|
224
|
+
print("VERY BAD - should have not reached this point")
|
218
225
|
|
226
|
+
num_tasks = len(images)
|
219
227
|
merged_output = merge_outputs(outputs)
|
220
|
-
return merged_output
|
228
|
+
return (merged_output, num_tasks, exceptions)
|
221
229
|
|
222
230
|
|
223
231
|
def run_v2_task_compound(
|
@@ -226,12 +234,12 @@ def run_v2_task_compound(
|
|
226
234
|
zarr_dir: str,
|
227
235
|
task: TaskV2,
|
228
236
|
wftask: WorkflowTaskV2,
|
229
|
-
executor:
|
237
|
+
executor: BaseRunner,
|
230
238
|
workflow_dir_local: Path,
|
231
239
|
workflow_dir_remote: Optional[Path] = None,
|
232
|
-
logger_name: Optional[str] = None,
|
233
240
|
submit_setup_call: Callable = no_op_submit_setup_call,
|
234
|
-
|
241
|
+
history_item_id: int,
|
242
|
+
) -> tuple[TaskOutput, int, dict[int, BaseException]]:
|
235
243
|
|
236
244
|
executor_options_init = _get_executor_options(
|
237
245
|
wftask=wftask,
|
@@ -254,30 +262,40 @@ def run_v2_task_compound(
|
|
254
262
|
zarr_dir=zarr_dir,
|
255
263
|
**(wftask.args_non_parallel or {}),
|
256
264
|
)
|
257
|
-
|
265
|
+
function_kwargs[_COMPONENT_KEY_] = f"init_{_index_to_component(0)}"
|
266
|
+
result, exception = executor.submit(
|
258
267
|
functools.partial(
|
259
268
|
run_single_task,
|
260
269
|
wftask=wftask,
|
261
270
|
command=task.command_non_parallel,
|
262
|
-
|
263
|
-
|
271
|
+
root_dir_local=workflow_dir_local,
|
272
|
+
root_dir_remote=workflow_dir_remote,
|
264
273
|
),
|
265
|
-
function_kwargs,
|
274
|
+
parameters=function_kwargs,
|
275
|
+
history_item_id=history_item_id,
|
276
|
+
in_compound_task=True,
|
266
277
|
**executor_options_init,
|
267
278
|
)
|
268
|
-
|
269
|
-
|
270
|
-
|
279
|
+
|
280
|
+
num_tasks = 1
|
281
|
+
if exception is None:
|
282
|
+
if result is None:
|
283
|
+
init_task_output = InitTaskOutput()
|
284
|
+
else:
|
285
|
+
init_task_output = _cast_and_validate_InitTaskOutput(result)
|
271
286
|
else:
|
272
|
-
|
287
|
+
return (TaskOutput(), num_tasks, {0: exception})
|
288
|
+
|
273
289
|
parallelization_list = init_task_output.parallelization_list
|
274
290
|
parallelization_list = deduplicate_list(parallelization_list)
|
275
291
|
|
292
|
+
num_tasks = 1 + len(parallelization_list)
|
293
|
+
|
276
294
|
# 3/B: parallel part of a compound task
|
277
295
|
_check_parallelization_list_size(parallelization_list)
|
278
296
|
|
279
297
|
if len(parallelization_list) == 0:
|
280
|
-
return TaskOutput()
|
298
|
+
return (TaskOutput(), 0, {})
|
281
299
|
|
282
300
|
list_function_kwargs = []
|
283
301
|
for ind, parallelization_item in enumerate(parallelization_list):
|
@@ -288,29 +306,35 @@ def run_v2_task_compound(
|
|
288
306
|
**(wftask.args_parallel or {}),
|
289
307
|
),
|
290
308
|
)
|
291
|
-
list_function_kwargs[-1][
|
309
|
+
list_function_kwargs[-1][
|
310
|
+
_COMPONENT_KEY_
|
311
|
+
] = f"compute_{_index_to_component(ind)}"
|
292
312
|
|
293
|
-
|
313
|
+
results, exceptions = executor.multisubmit(
|
294
314
|
functools.partial(
|
295
315
|
run_single_task,
|
296
316
|
wftask=wftask,
|
297
317
|
command=task.command_parallel,
|
298
|
-
|
299
|
-
|
318
|
+
root_dir_local=workflow_dir_local,
|
319
|
+
root_dir_remote=workflow_dir_remote,
|
300
320
|
),
|
301
|
-
list_function_kwargs,
|
321
|
+
list_parameters=list_function_kwargs,
|
322
|
+
history_item_id=history_item_id,
|
323
|
+
in_compound_task=True,
|
302
324
|
**executor_options_compute,
|
303
325
|
)
|
304
|
-
# Explicitly iterate over the whole list, so that all futures are waited
|
305
|
-
outputs = list(results_iterator)
|
306
326
|
|
307
|
-
|
308
|
-
for ind
|
309
|
-
if
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
327
|
+
outputs = []
|
328
|
+
for ind in range(len(list_function_kwargs)):
|
329
|
+
if ind in results.keys():
|
330
|
+
result = results[ind]
|
331
|
+
if result is None:
|
332
|
+
output = TaskOutput()
|
333
|
+
else:
|
334
|
+
output = _cast_and_validate_TaskOutput(result)
|
335
|
+
outputs.append(output)
|
336
|
+
elif ind in exceptions.keys():
|
337
|
+
print(f"Bad: {exceptions[ind]}")
|
314
338
|
|
315
339
|
merged_output = merge_outputs(outputs)
|
316
|
-
return merged_output
|
340
|
+
return (merged_output, num_tasks, exceptions)
|
@@ -11,11 +11,11 @@ from ..components import _COMPONENT_KEY_
|
|
11
11
|
from ..exceptions import JobExecutionError
|
12
12
|
from ..exceptions import TaskExecutionError
|
13
13
|
from fractal_server.app.models.v2 import WorkflowTaskV2
|
14
|
-
from fractal_server.app.runner.task_files import
|
14
|
+
from fractal_server.app.runner.task_files import TaskFiles
|
15
15
|
from fractal_server.string_tools import validate_cmd
|
16
16
|
|
17
17
|
|
18
|
-
def _call_command_wrapper(cmd: str, log_path:
|
18
|
+
def _call_command_wrapper(cmd: str, log_path: str) -> None:
|
19
19
|
"""
|
20
20
|
Call a command and write its stdout and stderr to files
|
21
21
|
|
@@ -50,7 +50,7 @@ def _call_command_wrapper(cmd: str, log_path: Path) -> None:
|
|
50
50
|
raise e
|
51
51
|
|
52
52
|
if result.returncode > 0:
|
53
|
-
with
|
53
|
+
with open(log_path, "r") as fp_stderr:
|
54
54
|
err = fp_stderr.read()
|
55
55
|
raise TaskExecutionError(err)
|
56
56
|
elif result.returncode < 0:
|
@@ -60,49 +60,49 @@ def _call_command_wrapper(cmd: str, log_path: Path) -> None:
|
|
60
60
|
|
61
61
|
|
62
62
|
def run_single_task(
|
63
|
-
|
63
|
+
parameters: dict[str, Any],
|
64
64
|
command: str,
|
65
65
|
wftask: WorkflowTaskV2,
|
66
|
-
|
67
|
-
|
66
|
+
root_dir_local: Path,
|
67
|
+
root_dir_remote: Optional[Path] = None,
|
68
68
|
logger_name: Optional[str] = None,
|
69
69
|
) -> dict[str, Any]:
|
70
70
|
"""
|
71
|
-
Runs within an executor.
|
71
|
+
Runs within an executor (AKA on the SLURM cluster).
|
72
72
|
"""
|
73
73
|
|
74
74
|
logger = logging.getLogger(logger_name)
|
75
75
|
logger.debug(f"Now start running {command=}")
|
76
76
|
|
77
|
-
if not
|
78
|
-
|
77
|
+
if not root_dir_remote:
|
78
|
+
root_dir_remote = root_dir_local
|
79
79
|
|
80
80
|
task_name = wftask.task.name
|
81
81
|
|
82
|
-
component =
|
83
|
-
task_files =
|
84
|
-
|
85
|
-
|
86
|
-
task_order=wftask.order,
|
82
|
+
component = parameters.pop(_COMPONENT_KEY_)
|
83
|
+
task_files = TaskFiles(
|
84
|
+
root_dir_local=root_dir_local,
|
85
|
+
root_dir_remote=root_dir_remote,
|
87
86
|
task_name=task_name,
|
87
|
+
task_order=wftask.order,
|
88
88
|
component=component,
|
89
89
|
)
|
90
90
|
|
91
91
|
# Write arguments to args.json file
|
92
|
-
with task_files.
|
93
|
-
json.dump(
|
92
|
+
with open(task_files.args_file_remote, "w") as f:
|
93
|
+
json.dump(parameters, f, indent=2)
|
94
94
|
|
95
95
|
# Assemble full command
|
96
96
|
full_command = (
|
97
97
|
f"{command} "
|
98
|
-
f"--args-json {task_files.
|
99
|
-
f"--out-json {task_files.
|
98
|
+
f"--args-json {task_files.args_file_remote} "
|
99
|
+
f"--out-json {task_files.metadiff_file_remote}"
|
100
100
|
)
|
101
101
|
|
102
102
|
try:
|
103
103
|
_call_command_wrapper(
|
104
104
|
full_command,
|
105
|
-
log_path=task_files.
|
105
|
+
log_path=task_files.log_file_remote,
|
106
106
|
)
|
107
107
|
except TaskExecutionError as e:
|
108
108
|
e.workflow_task_order = wftask.order
|
@@ -111,7 +111,7 @@ def run_single_task(
|
|
111
111
|
raise e
|
112
112
|
|
113
113
|
try:
|
114
|
-
with task_files.
|
114
|
+
with open(task_files.metadiff_file_remote, "r") as f:
|
115
115
|
out_meta = json.load(f)
|
116
116
|
except FileNotFoundError as e:
|
117
117
|
logger.debug(
|
@@ -0,0 +1,18 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
|
3
|
+
from pydantic import BaseModel
|
4
|
+
from pydantic import field_serializer
|
5
|
+
from pydantic.types import AwareDatetime
|
6
|
+
|
7
|
+
|
8
|
+
class AccountingRecordRead(BaseModel):
|
9
|
+
|
10
|
+
id: int
|
11
|
+
user_id: int
|
12
|
+
timestamp: AwareDatetime
|
13
|
+
num_tasks: int
|
14
|
+
num_new_images: int
|
15
|
+
|
16
|
+
@field_serializer("timestamp")
|
17
|
+
def serialize_datetime(v: datetime) -> str:
|
18
|
+
return v.isoformat()
|
@@ -14,27 +14,12 @@ from .._filter_validators import validate_attribute_filters
|
|
14
14
|
from .._filter_validators import validate_type_filters
|
15
15
|
from .._validators import root_validate_dict_keys
|
16
16
|
from .._validators import valstr
|
17
|
-
from .dumps import WorkflowTaskDumpV2
|
18
17
|
from .project import ProjectReadV2
|
19
|
-
from .workflowtask import WorkflowTaskStatusTypeV2
|
20
18
|
from fractal_server.images import SingleImage
|
21
19
|
from fractal_server.images.models import AttributeFiltersType
|
22
20
|
from fractal_server.urls import normalize_url
|
23
21
|
|
24
22
|
|
25
|
-
class _DatasetHistoryItemV2(BaseModel):
|
26
|
-
"""
|
27
|
-
Class for an item of `Dataset.history`.
|
28
|
-
"""
|
29
|
-
|
30
|
-
workflowtask: WorkflowTaskDumpV2
|
31
|
-
status: WorkflowTaskStatusTypeV2
|
32
|
-
parallelization: Optional[dict] = None
|
33
|
-
|
34
|
-
|
35
|
-
# CRUD
|
36
|
-
|
37
|
-
|
38
23
|
class DatasetCreateV2(BaseModel):
|
39
24
|
model_config = ConfigDict(extra="forbid")
|
40
25
|
|
@@ -74,8 +59,6 @@ class DatasetReadV2(BaseModel):
|
|
74
59
|
project_id: int
|
75
60
|
project: ProjectReadV2
|
76
61
|
|
77
|
-
history: list[_DatasetHistoryItemV2]
|
78
|
-
|
79
62
|
timestamp_created: AwareDatetime
|
80
63
|
|
81
64
|
zarr_dir: str
|