fractal-server 2.10.5__py3-none-any.whl → 2.11.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. fractal_server/__init__.py +1 -1
  2. fractal_server/app/models/v2/dataset.py +9 -6
  3. fractal_server/app/models/v2/job.py +5 -0
  4. fractal_server/app/models/v2/workflowtask.py +5 -8
  5. fractal_server/app/routes/api/v1/dataset.py +2 -2
  6. fractal_server/app/routes/api/v2/_aux_functions.py +3 -10
  7. fractal_server/app/routes/api/v2/images.py +29 -6
  8. fractal_server/app/routes/api/v2/status.py +20 -20
  9. fractal_server/app/routes/api/v2/submit.py +5 -1
  10. fractal_server/app/routes/api/v2/workflowtask.py +3 -3
  11. fractal_server/app/runner/filenames.py +2 -4
  12. fractal_server/app/runner/v1/_common.py +4 -4
  13. fractal_server/app/runner/v1/handle_failed_job.py +4 -4
  14. fractal_server/app/runner/v2/__init__.py +11 -65
  15. fractal_server/app/runner/v2/_local/__init__.py +12 -17
  16. fractal_server/app/runner/v2/_local_experimental/__init__.py +11 -20
  17. fractal_server/app/runner/v2/_slurm_ssh/__init__.py +14 -16
  18. fractal_server/app/runner/v2/_slurm_sudo/__init__.py +12 -14
  19. fractal_server/app/runner/v2/handle_failed_job.py +31 -130
  20. fractal_server/app/runner/v2/merge_outputs.py +13 -16
  21. fractal_server/app/runner/v2/runner.py +63 -72
  22. fractal_server/app/runner/v2/task_interface.py +41 -2
  23. fractal_server/app/schemas/_filter_validators.py +47 -0
  24. fractal_server/app/schemas/_validators.py +13 -2
  25. fractal_server/app/schemas/v2/dataset.py +58 -12
  26. fractal_server/app/schemas/v2/dumps.py +6 -8
  27. fractal_server/app/schemas/v2/job.py +14 -0
  28. fractal_server/app/schemas/v2/task.py +9 -9
  29. fractal_server/app/schemas/v2/task_group.py +2 -2
  30. fractal_server/app/schemas/v2/workflowtask.py +42 -19
  31. fractal_server/data_migrations/2_11_0.py +67 -0
  32. fractal_server/images/__init__.py +0 -1
  33. fractal_server/images/models.py +12 -35
  34. fractal_server/images/tools.py +29 -13
  35. fractal_server/migrations/versions/db09233ad13a_split_filters_and_keep_old_columns.py +96 -0
  36. {fractal_server-2.10.5.dist-info → fractal_server-2.11.0a2.dist-info}/METADATA +1 -1
  37. {fractal_server-2.10.5.dist-info → fractal_server-2.11.0a2.dist-info}/RECORD +40 -37
  38. {fractal_server-2.10.5.dist-info → fractal_server-2.11.0a2.dist-info}/LICENSE +0 -0
  39. {fractal_server-2.10.5.dist-info → fractal_server-2.11.0a2.dist-info}/WHEEL +0 -0
  40. {fractal_server-2.10.5.dist-info → fractal_server-2.11.0a2.dist-info}/entry_points.txt +0 -0
@@ -11,6 +11,7 @@ from ...set_start_and_last_task_index import set_start_and_last_task_index
11
11
  from ..runner import execute_tasks_v2
12
12
  from ._submit_setup import _local_submit_setup
13
13
  from .executor import FractalProcessPoolExecutor
14
+ from fractal_server.images.models import AttributeFiltersType
14
15
 
15
16
 
16
17
  def _process_workflow(
@@ -21,23 +22,18 @@ def _process_workflow(
21
22
  workflow_dir_local: Path,
22
23
  first_task_index: int,
23
24
  last_task_index: int,
24
- ) -> dict:
25
+ job_attribute_filters: AttributeFiltersType,
26
+ ) -> None:
25
27
  """
26
- Internal processing routine
27
-
28
- Schedules the workflow using a `FractalProcessPoolExecutor`.
29
-
30
- Cf.
31
- [process_workflow][fractal_server.app.runner.v2._local_experimental.process_workflow]
32
- for the call signature.
28
+ Run the workflow using a `FractalProcessPoolExecutor`.
33
29
  """
34
30
  with FractalProcessPoolExecutor(
35
31
  shutdown_file=workflow_dir_local / SHUTDOWN_FILENAME
36
32
  ) as executor:
37
33
  try:
38
- new_dataset_attributes = execute_tasks_v2(
34
+ execute_tasks_v2(
39
35
  wf_task_list=workflow.task_list[
40
- first_task_index : (last_task_index + 1) # noqa
36
+ first_task_index : (last_task_index + 1)
41
37
  ],
42
38
  dataset=dataset,
43
39
  executor=executor,
@@ -45,6 +41,7 @@ def _process_workflow(
45
41
  workflow_dir_remote=workflow_dir_local,
46
42
  logger_name=logger_name,
47
43
  submit_setup_call=_local_submit_setup,
44
+ job_attribute_filters=job_attribute_filters,
48
45
  )
49
46
  except BrokenProcessPool as e:
50
47
  raise JobExecutionError(
@@ -54,8 +51,6 @@ def _process_workflow(
54
51
  )
55
52
  )
56
53
 
57
- return new_dataset_attributes
58
-
59
54
 
60
55
  async def process_workflow(
61
56
  *,
@@ -66,12 +61,13 @@ async def process_workflow(
66
61
  first_task_index: Optional[int] = None,
67
62
  last_task_index: Optional[int] = None,
68
63
  logger_name: str,
64
+ job_attribute_filters: AttributeFiltersType,
69
65
  # Slurm-specific
70
66
  user_cache_dir: Optional[str] = None,
71
67
  slurm_user: Optional[str] = None,
72
68
  slurm_account: Optional[str] = None,
73
69
  worker_init: Optional[str] = None,
74
- ) -> dict:
70
+ ) -> None:
75
71
  """
76
72
  Run a workflow
77
73
 
@@ -123,11 +119,6 @@ async def process_workflow(
123
119
  (positive exit codes).
124
120
  JobExecutionError: wrapper for errors raised by the tasks' executors
125
121
  (negative exit codes).
126
-
127
- Returns:
128
- output_dataset_metadata:
129
- The updated metadata for the dataset, as returned by the last task
130
- of the workflow
131
122
  """
132
123
 
133
124
  if workflow_dir_remote and (workflow_dir_remote != workflow_dir_local):
@@ -144,12 +135,12 @@ async def process_workflow(
144
135
  last_task_index=last_task_index,
145
136
  )
146
137
 
147
- new_dataset_attributes = await async_wrap(_process_workflow)(
138
+ await async_wrap(_process_workflow)(
148
139
  workflow=workflow,
149
140
  dataset=dataset,
150
141
  logger_name=logger_name,
151
142
  workflow_dir_local=workflow_dir_local,
152
143
  first_task_index=first_task_index,
153
144
  last_task_index=last_task_index,
145
+ job_attribute_filters=job_attribute_filters,
154
146
  )
155
- return new_dataset_attributes
@@ -17,7 +17,6 @@ This backend runs fractal workflows in a SLURM cluster using Clusterfutures
17
17
  Executor objects.
18
18
  """
19
19
  from pathlib import Path
20
- from typing import Any
21
20
  from typing import Optional
22
21
  from typing import Union
23
22
 
@@ -30,9 +29,9 @@ from ...executors.slurm.ssh.executor import FractalSlurmSSHExecutor
30
29
  from ...set_start_and_last_task_index import set_start_and_last_task_index
31
30
  from ..runner import execute_tasks_v2
32
31
  from ._submit_setup import _slurm_submit_setup
32
+ from fractal_server.images.models import AttributeFiltersType
33
33
  from fractal_server.logger import set_logger
34
34
 
35
-
36
35
  logger = set_logger(__name__)
37
36
 
38
37
 
@@ -47,16 +46,14 @@ def _process_workflow(
47
46
  last_task_index: int,
48
47
  fractal_ssh: FractalSSH,
49
48
  worker_init: Optional[Union[str, list[str]]] = None,
50
- ) -> dict[str, Any]:
49
+ job_attribute_filters: AttributeFiltersType,
50
+ ) -> None:
51
51
  """
52
- Internal processing routine for the SLURM backend
52
+ Run the workflow using a `FractalSlurmSSHExecutor`.
53
53
 
54
54
  This function initialises the a FractalSlurmExecutor, setting logging,
55
55
  workflow working dir and user to impersonate. It then schedules the
56
56
  workflow tasks and returns the new dataset attributes
57
-
58
- Returns:
59
- new_dataset_attributes:
60
57
  """
61
58
 
62
59
  if isinstance(worker_init, str):
@@ -80,18 +77,18 @@ def _process_workflow(
80
77
  workflow_dir_remote=workflow_dir_remote,
81
78
  common_script_lines=worker_init,
82
79
  ) as executor:
83
- new_dataset_attributes = execute_tasks_v2(
80
+ execute_tasks_v2(
84
81
  wf_task_list=workflow.task_list[
85
- first_task_index : (last_task_index + 1) # noqa
86
- ], # noqa
82
+ first_task_index : (last_task_index + 1)
83
+ ],
87
84
  dataset=dataset,
88
85
  executor=executor,
89
86
  workflow_dir_local=workflow_dir_local,
90
87
  workflow_dir_remote=workflow_dir_remote,
91
88
  logger_name=logger_name,
92
89
  submit_setup_call=_slurm_submit_setup,
90
+ job_attribute_filters=job_attribute_filters,
93
91
  )
94
- return new_dataset_attributes
95
92
 
96
93
 
97
94
  async def process_workflow(
@@ -103,13 +100,14 @@ async def process_workflow(
103
100
  first_task_index: Optional[int] = None,
104
101
  last_task_index: Optional[int] = None,
105
102
  logger_name: str,
106
- # Not used
103
+ job_attribute_filters: AttributeFiltersType,
107
104
  fractal_ssh: FractalSSH,
105
+ worker_init: Optional[str] = None,
106
+ # Not used
108
107
  user_cache_dir: Optional[str] = None,
109
108
  slurm_user: Optional[str] = None,
110
109
  slurm_account: Optional[str] = None,
111
- worker_init: Optional[str] = None,
112
- ) -> dict:
110
+ ) -> None:
113
111
  """
114
112
  Process workflow (SLURM backend public interface)
115
113
  """
@@ -122,7 +120,7 @@ async def process_workflow(
122
120
  last_task_index=last_task_index,
123
121
  )
124
122
 
125
- new_dataset_attributes = await async_wrap(_process_workflow)(
123
+ await async_wrap(_process_workflow)(
126
124
  workflow=workflow,
127
125
  dataset=dataset,
128
126
  logger_name=logger_name,
@@ -132,5 +130,5 @@ async def process_workflow(
132
130
  last_task_index=last_task_index,
133
131
  worker_init=worker_init,
134
132
  fractal_ssh=fractal_ssh,
133
+ job_attribute_filters=job_attribute_filters,
135
134
  )
136
- return new_dataset_attributes
@@ -17,7 +17,6 @@ This backend runs fractal workflows in a SLURM cluster using Clusterfutures
17
17
  Executor objects.
18
18
  """
19
19
  from pathlib import Path
20
- from typing import Any
21
20
  from typing import Optional
22
21
  from typing import Union
23
22
 
@@ -28,6 +27,7 @@ from ...executors.slurm.sudo.executor import FractalSlurmExecutor
28
27
  from ...set_start_and_last_task_index import set_start_and_last_task_index
29
28
  from ..runner import execute_tasks_v2
30
29
  from ._submit_setup import _slurm_submit_setup
30
+ from fractal_server.images.models import AttributeFiltersType
31
31
 
32
32
 
33
33
  def _process_workflow(
@@ -43,16 +43,14 @@ def _process_workflow(
43
43
  slurm_account: Optional[str] = None,
44
44
  user_cache_dir: str,
45
45
  worker_init: Optional[Union[str, list[str]]] = None,
46
- ) -> dict[str, Any]:
46
+ job_attribute_filters: AttributeFiltersType,
47
+ ) -> None:
47
48
  """
48
- Internal processing routine for the SLURM backend
49
+ Run the workflow using a `FractalSlurmExecutor`.
49
50
 
50
51
  This function initialises the a FractalSlurmExecutor, setting logging,
51
52
  workflow working dir and user to impersonate. It then schedules the
52
53
  workflow tasks and returns the new dataset attributes
53
-
54
- Returns:
55
- new_dataset_attributes:
56
54
  """
57
55
 
58
56
  if not slurm_user:
@@ -73,18 +71,18 @@ def _process_workflow(
73
71
  common_script_lines=worker_init,
74
72
  slurm_account=slurm_account,
75
73
  ) as executor:
76
- new_dataset_attributes = execute_tasks_v2(
74
+ execute_tasks_v2(
77
75
  wf_task_list=workflow.task_list[
78
- first_task_index : (last_task_index + 1) # noqa
79
- ], # noqa
76
+ first_task_index : (last_task_index + 1)
77
+ ],
80
78
  dataset=dataset,
81
79
  executor=executor,
82
80
  workflow_dir_local=workflow_dir_local,
83
81
  workflow_dir_remote=workflow_dir_remote,
84
82
  logger_name=logger_name,
85
83
  submit_setup_call=_slurm_submit_setup,
84
+ job_attribute_filters=job_attribute_filters,
86
85
  )
87
- return new_dataset_attributes
88
86
 
89
87
 
90
88
  async def process_workflow(
@@ -96,12 +94,13 @@ async def process_workflow(
96
94
  first_task_index: Optional[int] = None,
97
95
  last_task_index: Optional[int] = None,
98
96
  logger_name: str,
97
+ job_attribute_filters: AttributeFiltersType,
99
98
  # Slurm-specific
100
99
  user_cache_dir: Optional[str] = None,
101
100
  slurm_user: Optional[str] = None,
102
101
  slurm_account: Optional[str] = None,
103
102
  worker_init: Optional[str] = None,
104
- ) -> dict:
103
+ ) -> None:
105
104
  """
106
105
  Process workflow (SLURM backend public interface).
107
106
  """
@@ -113,8 +112,7 @@ async def process_workflow(
113
112
  first_task_index=first_task_index,
114
113
  last_task_index=last_task_index,
115
114
  )
116
-
117
- new_dataset_attributes = await async_wrap(_process_workflow)(
115
+ await async_wrap(_process_workflow)(
118
116
  workflow=workflow,
119
117
  dataset=dataset,
120
118
  logger_name=logger_name,
@@ -126,5 +124,5 @@ async def process_workflow(
126
124
  slurm_user=slurm_user,
127
125
  slurm_account=slurm_account,
128
126
  worker_init=worker_init,
127
+ job_attribute_filters=job_attribute_filters,
129
128
  )
130
- return new_dataset_attributes
@@ -12,147 +12,48 @@
12
12
  """
13
13
  Helper functions to handle Dataset history.
14
14
  """
15
- import json
16
15
  import logging
17
- from pathlib import Path
18
- from typing import Any
19
- from typing import Optional
16
+
17
+ from sqlalchemy.orm.attributes import flag_modified
20
18
 
21
19
  from ...models.v2 import DatasetV2
22
- from ...models.v2 import JobV2
23
- from ...models.v2 import WorkflowTaskV2
24
- from ...models.v2 import WorkflowV2
25
20
  from ...schemas.v2 import WorkflowTaskStatusTypeV2
26
- from ..filenames import FILTERS_FILENAME
27
- from ..filenames import HISTORY_FILENAME
28
- from ..filenames import IMAGES_FILENAME
21
+ from fractal_server.app.db import get_sync_db
29
22
 
30
23
 
31
- def assemble_history_failed_job(
32
- job: JobV2,
33
- dataset: DatasetV2,
34
- workflow: WorkflowV2,
35
- logger_name: Optional[str] = None,
36
- failed_wftask: Optional[WorkflowTaskV2] = None,
37
- ) -> list[dict[str, Any]]:
24
+ def mark_last_wftask_as_failed(
25
+ dataset_id: int,
26
+ logger_name: str,
27
+ ) -> None:
38
28
  """
39
- Assemble `history` after a workflow-execution job fails.
29
+ Edit dataset history, by marking last item as failed.
40
30
 
41
31
  Args:
42
- job:
43
- The failed `JobV2` object.
44
- dataset:
45
- The `DatasetV2` object associated to `job`.
46
- workflow:
47
- The `WorkflowV2` object associated to `job`.
32
+ dataset: The `DatasetV2` object
48
33
  logger_name: A logger name.
49
- failed_wftask:
50
- If set, append it to `history` during step 3; if `None`, infer
51
- it by comparing the job task list and the one in
52
- `HISTORY_FILENAME`.
53
-
54
- Returns:
55
- The new value of `history`, to be merged into
56
- `dataset.meta`.
57
34
  """
58
35
 
59
36
  logger = logging.getLogger(logger_name)
60
-
61
- # The final value of the history attribute should include up to three
62
- # parts, coming from: the database, the temporary file, the failed-task
63
- # information.
64
-
65
- # Part 1: Read exising history from DB
66
- new_history = dataset.history
67
-
68
- # Part 2: Extend history based on temporary-file contents
69
- tmp_history_file = Path(job.working_dir) / HISTORY_FILENAME
70
- try:
71
- with tmp_history_file.open("r") as f:
72
- tmp_file_history = json.load(f)
73
- new_history.extend(tmp_file_history)
74
- except FileNotFoundError:
75
- tmp_file_history = []
76
-
77
- # Part 3/A: Identify failed task, if needed
78
- if failed_wftask is None:
79
- job_wftasks = workflow.task_list[
80
- job.first_task_index : (job.last_task_index + 1) # noqa
81
- ]
82
- tmp_file_wftasks = [
83
- history_item["workflowtask"] for history_item in tmp_file_history
84
- ]
85
- if len(job_wftasks) <= len(tmp_file_wftasks):
86
- n_tasks_job = len(job_wftasks)
87
- n_tasks_tmp = len(tmp_file_wftasks)
88
- logger.error(
89
- "Cannot identify the failed task based on job task list "
90
- f"(length {n_tasks_job}) and temporary-file task list "
91
- f"(length {n_tasks_tmp})."
37
+ with next(get_sync_db()) as db:
38
+ db_dataset = db.get(DatasetV2, dataset_id)
39
+ if len(db_dataset.history) == 0:
40
+ logger.warning(
41
+ f"History for {dataset_id=} is empty. Likely reason: the job "
42
+ "failed before its first task was marked as SUBMITTED. "
43
+ "Continue."
92
44
  )
93
- logger.error("Failed task not appended to history.")
94
- else:
95
- failed_wftask = job_wftasks[len(tmp_file_wftasks)]
96
-
97
- # Part 3/B: Append failed task to history
98
- if failed_wftask is not None:
99
- failed_wftask_dump = failed_wftask.model_dump(exclude={"task"})
100
- failed_wftask_dump["task"] = failed_wftask.task.model_dump()
101
- new_history_item = dict(
102
- workflowtask=failed_wftask_dump,
103
- status=WorkflowTaskStatusTypeV2.FAILED,
104
- parallelization=dict(), # FIXME: re-include parallelization
105
- )
106
- new_history.append(new_history_item)
107
-
108
- return new_history
109
-
110
-
111
- def assemble_images_failed_job(job: JobV2) -> Optional[dict[str, Any]]:
112
- """
113
- Assemble `DatasetV2.images` for a failed workflow-execution.
114
-
115
- Assemble new value of `images` based on the last successful task, i.e.
116
- based on the content of the temporary `IMAGES_FILENAME` file. If the file
117
- is missing, return `None`.
118
-
119
- Argumentss:
120
- job:
121
- The failed `JobV2` object.
122
-
123
- Returns:
124
- The new value of `dataset.images`, or `None` if `IMAGES_FILENAME`
125
- is missing.
126
- """
127
- tmp_file = Path(job.working_dir) / IMAGES_FILENAME
128
- try:
129
- with tmp_file.open("r") as f:
130
- new_images = json.load(f)
131
- return new_images
132
- except FileNotFoundError:
133
- return None
134
-
135
-
136
- def assemble_filters_failed_job(job: JobV2) -> Optional[dict[str, Any]]:
137
- """
138
- Assemble `DatasetV2.filters` for a failed workflow-execution.
139
-
140
- Assemble new value of `filters` based on the last successful task, i.e.
141
- based on the content of the temporary `FILTERS_FILENAME` file. If the file
142
- is missing, return `None`.
143
-
144
- Argumentss:
145
- job:
146
- The failed `JobV2` object.
147
-
148
- Returns:
149
- The new value of `dataset.filters`, or `None` if `FILTERS_FILENAME`
150
- is missing.
151
- """
152
- tmp_file = Path(job.working_dir) / FILTERS_FILENAME
153
- try:
154
- with tmp_file.open("r") as f:
155
- new_filters = json.load(f)
156
- return new_filters
157
- except FileNotFoundError:
158
- return None
45
+ return
46
+ workflowtask_id = db_dataset.history[-1]["workflowtask"]["id"]
47
+ last_item_status = db_dataset.history[-1]["status"]
48
+ if last_item_status != WorkflowTaskStatusTypeV2.SUBMITTED:
49
+ logger.warning(
50
+ "Unexpected branch: "
51
+ f"Last history item, for {workflowtask_id=}, "
52
+ f"has status {last_item_status}. Skip."
53
+ )
54
+ return
55
+ logger.info(f"Setting history item for {workflowtask_id=} to failed.")
56
+ db_dataset.history[-1]["status"] = WorkflowTaskStatusTypeV2.FAILED
57
+ flag_modified(db_dataset, "history")
58
+ db.merge(db_dataset)
59
+ db.commit()
@@ -1,38 +1,35 @@
1
- from copy import copy
2
-
3
1
  from fractal_server.app.runner.v2.deduplicate_list import deduplicate_list
4
2
  from fractal_server.app.runner.v2.task_interface import TaskOutput
5
3
 
6
4
 
7
5
  def merge_outputs(task_outputs: list[TaskOutput]) -> TaskOutput:
8
6
 
7
+ if len(task_outputs) == 0:
8
+ return TaskOutput()
9
+
9
10
  final_image_list_updates = []
10
11
  final_image_list_removals = []
11
- last_new_filters = None
12
12
 
13
- for ind, task_output in enumerate(task_outputs):
13
+ for task_output in task_outputs:
14
14
 
15
15
  final_image_list_updates.extend(task_output.image_list_updates)
16
16
  final_image_list_removals.extend(task_output.image_list_removals)
17
17
 
18
- # Check that all filters are the same
19
- current_new_filters = task_output.filters
20
- if ind == 0:
21
- last_new_filters = copy(current_new_filters)
22
- if current_new_filters != last_new_filters:
23
- raise ValueError(f"{current_new_filters=} but {last_new_filters=}")
24
- last_new_filters = copy(current_new_filters)
18
+ # Check that all type_filters are the same
19
+ if task_output.type_filters != task_outputs[0].type_filters:
20
+ raise ValueError(
21
+ f"{task_output.type_filters=} "
22
+ f"but {task_outputs[0].type_filters=}"
23
+ )
25
24
 
25
+ # Note: the ordering of `image_list_removals` is not guaranteed
26
26
  final_image_list_updates = deduplicate_list(final_image_list_updates)
27
-
28
- additional_args = {}
29
- if last_new_filters is not None:
30
- additional_args["filters"] = last_new_filters
27
+ final_image_list_removals = list(set(final_image_list_removals))
31
28
 
32
29
  final_output = TaskOutput(
33
30
  image_list_updates=final_image_list_updates,
34
31
  image_list_removals=final_image_list_removals,
35
- **additional_args,
32
+ type_filters=task_outputs[0].type_filters,
36
33
  )
37
34
 
38
35
  return final_output