fractal-server 2.8.1__py3-none-any.whl → 2.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/db/__init__.py +2 -35
- fractal_server/app/models/v2/__init__.py +3 -3
- fractal_server/app/models/v2/task.py +0 -72
- fractal_server/app/models/v2/task_group.py +113 -0
- fractal_server/app/routes/admin/v1.py +13 -30
- fractal_server/app/routes/admin/v2/__init__.py +4 -0
- fractal_server/app/routes/admin/v2/job.py +13 -24
- fractal_server/app/routes/admin/v2/task.py +13 -0
- fractal_server/app/routes/admin/v2/task_group.py +75 -14
- fractal_server/app/routes/admin/v2/task_group_lifecycle.py +267 -0
- fractal_server/app/routes/api/v1/project.py +7 -19
- fractal_server/app/routes/api/v2/__init__.py +11 -2
- fractal_server/app/routes/api/v2/{_aux_functions_task_collection.py → _aux_functions_task_lifecycle.py} +83 -0
- fractal_server/app/routes/api/v2/_aux_functions_tasks.py +27 -17
- fractal_server/app/routes/api/v2/submit.py +19 -24
- fractal_server/app/routes/api/v2/task_collection.py +33 -65
- fractal_server/app/routes/api/v2/task_collection_custom.py +3 -3
- fractal_server/app/routes/api/v2/task_group.py +86 -14
- fractal_server/app/routes/api/v2/task_group_lifecycle.py +272 -0
- fractal_server/app/routes/api/v2/workflow.py +1 -1
- fractal_server/app/routes/api/v2/workflow_import.py +2 -2
- fractal_server/app/routes/auth/current_user.py +60 -17
- fractal_server/app/routes/auth/group.py +67 -39
- fractal_server/app/routes/auth/users.py +97 -99
- fractal_server/app/routes/aux/__init__.py +20 -0
- fractal_server/app/runner/executors/slurm/_slurm_config.py +0 -17
- fractal_server/app/runner/executors/slurm/ssh/executor.py +49 -204
- fractal_server/app/runner/executors/slurm/sudo/executor.py +26 -109
- fractal_server/app/runner/executors/slurm/utils_executors.py +58 -0
- fractal_server/app/runner/v2/_local_experimental/executor.py +2 -1
- fractal_server/app/schemas/_validators.py +0 -15
- fractal_server/app/schemas/user.py +16 -10
- fractal_server/app/schemas/user_group.py +0 -11
- fractal_server/app/schemas/v1/applyworkflow.py +0 -8
- fractal_server/app/schemas/v1/dataset.py +0 -5
- fractal_server/app/schemas/v1/project.py +0 -5
- fractal_server/app/schemas/v1/state.py +0 -5
- fractal_server/app/schemas/v1/workflow.py +0 -5
- fractal_server/app/schemas/v2/__init__.py +4 -2
- fractal_server/app/schemas/v2/dataset.py +0 -6
- fractal_server/app/schemas/v2/job.py +0 -8
- fractal_server/app/schemas/v2/project.py +0 -5
- fractal_server/app/schemas/v2/task_collection.py +0 -21
- fractal_server/app/schemas/v2/task_group.py +59 -8
- fractal_server/app/schemas/v2/workflow.py +0 -5
- fractal_server/app/security/__init__.py +17 -0
- fractal_server/config.py +61 -59
- fractal_server/migrations/versions/d256a7379ab8_taskgroup_activity_and_venv_info_to_.py +117 -0
- fractal_server/ssh/_fabric.py +156 -83
- fractal_server/tasks/utils.py +2 -12
- fractal_server/tasks/v2/local/__init__.py +3 -0
- fractal_server/tasks/v2/local/_utils.py +70 -0
- fractal_server/tasks/v2/local/collect.py +291 -0
- fractal_server/tasks/v2/local/deactivate.py +218 -0
- fractal_server/tasks/v2/local/reactivate.py +159 -0
- fractal_server/tasks/v2/ssh/__init__.py +3 -0
- fractal_server/tasks/v2/ssh/_utils.py +87 -0
- fractal_server/tasks/v2/ssh/collect.py +311 -0
- fractal_server/tasks/v2/ssh/deactivate.py +253 -0
- fractal_server/tasks/v2/ssh/reactivate.py +202 -0
- fractal_server/tasks/v2/templates/{_2_preliminary_pip_operations.sh → 1_create_venv.sh} +6 -7
- fractal_server/tasks/v2/templates/{_3_pip_install.sh → 2_pip_install.sh} +8 -1
- fractal_server/tasks/v2/templates/{_4_pip_freeze.sh → 3_pip_freeze.sh} +0 -7
- fractal_server/tasks/v2/templates/{_5_pip_show.sh → 4_pip_show.sh} +5 -6
- fractal_server/tasks/v2/templates/5_get_venv_size_and_file_number.sh +10 -0
- fractal_server/tasks/v2/templates/6_pip_install_from_freeze.sh +35 -0
- fractal_server/tasks/v2/utils_background.py +42 -127
- fractal_server/tasks/v2/utils_templates.py +32 -2
- fractal_server/utils.py +4 -2
- fractal_server/zip_tools.py +21 -4
- {fractal_server-2.8.1.dist-info → fractal_server-2.9.0.dist-info}/METADATA +3 -5
- {fractal_server-2.8.1.dist-info → fractal_server-2.9.0.dist-info}/RECORD +77 -64
- fractal_server/app/models/v2/collection_state.py +0 -22
- fractal_server/tasks/v2/collection_local.py +0 -357
- fractal_server/tasks/v2/collection_ssh.py +0 -352
- fractal_server/tasks/v2/templates/_1_create_venv.sh +0 -42
- /fractal_server/tasks/v2/{database_operations.py → utils_database.py} +0 -0
- {fractal_server-2.8.1.dist-info → fractal_server-2.9.0.dist-info}/LICENSE +0 -0
- {fractal_server-2.8.1.dist-info → fractal_server-2.9.0.dist-info}/WHEEL +0 -0
- {fractal_server-2.8.1.dist-info → fractal_server-2.9.0.dist-info}/entry_points.txt +0 -0
@@ -8,9 +8,7 @@ from fastapi import status
|
|
8
8
|
from fastapi_users import exceptions
|
9
9
|
from fastapi_users import schemas
|
10
10
|
from fastapi_users.router.common import ErrorCode
|
11
|
-
from sqlalchemy.exc import IntegrityError
|
12
11
|
from sqlalchemy.ext.asyncio import AsyncSession
|
13
|
-
from sqlmodel import col
|
14
12
|
from sqlmodel import func
|
15
13
|
from sqlmodel import select
|
16
14
|
|
@@ -18,9 +16,10 @@ from . import current_active_superuser
|
|
18
16
|
from ...db import get_async_db
|
19
17
|
from ...schemas.user import UserRead
|
20
18
|
from ...schemas.user import UserUpdate
|
21
|
-
from ...schemas.user import UserUpdateWithNewGroupIds
|
22
19
|
from ..aux.validate_user_settings import verify_user_has_settings
|
20
|
+
from ._aux_auth import _get_default_usergroup_id
|
23
21
|
from ._aux_auth import _get_single_user_with_groups
|
22
|
+
from ._aux_auth import FRACTAL_DEFAULT_GROUP_NAME
|
24
23
|
from fractal_server.app.models import LinkUserGroup
|
25
24
|
from fractal_server.app.models import UserGroup
|
26
25
|
from fractal_server.app.models import UserOAuth
|
@@ -28,6 +27,7 @@ from fractal_server.app.models import UserSettings
|
|
28
27
|
from fractal_server.app.routes.auth._aux_auth import _user_or_404
|
29
28
|
from fractal_server.app.schemas import UserSettingsRead
|
30
29
|
from fractal_server.app.schemas import UserSettingsUpdate
|
30
|
+
from fractal_server.app.schemas.user import UserUpdateGroups
|
31
31
|
from fractal_server.app.security import get_user_manager
|
32
32
|
from fractal_server.app.security import UserManager
|
33
33
|
from fractal_server.logger import set_logger
|
@@ -55,114 +55,43 @@ async def get_user(
|
|
55
55
|
@router_users.patch("/users/{user_id}/", response_model=UserRead)
|
56
56
|
async def patch_user(
|
57
57
|
user_id: int,
|
58
|
-
user_update:
|
58
|
+
user_update: UserUpdate,
|
59
59
|
current_superuser: UserOAuth = Depends(current_active_superuser),
|
60
60
|
user_manager: UserManager = Depends(get_user_manager),
|
61
61
|
db: AsyncSession = Depends(get_async_db),
|
62
62
|
):
|
63
63
|
"""
|
64
64
|
Custom version of the PATCH-user route from `fastapi-users`.
|
65
|
-
|
66
|
-
In order to keep the fastapi-users logic in place (which is convenient to
|
67
|
-
update user attributes), we split the endpoint into two branches. We either
|
68
|
-
go through the fastapi-users-based attribute-update branch, or through the
|
69
|
-
branch where we establish new user/group relationships.
|
70
|
-
|
71
|
-
Note that we prevent making both changes at the same time, since it would
|
72
|
-
be more complex to guarantee that endpoint error would leave the database
|
73
|
-
in the same state as before the API call.
|
74
65
|
"""
|
75
66
|
|
76
|
-
# We prevent simultaneous editing of both user attributes and user/group
|
77
|
-
# associations
|
78
|
-
user_update_dict_without_groups = user_update.dict(
|
79
|
-
exclude_unset=True, exclude={"new_group_ids"}
|
80
|
-
)
|
81
|
-
edit_attributes = user_update_dict_without_groups != {}
|
82
|
-
edit_groups = user_update.new_group_ids is not None
|
83
|
-
if edit_attributes and edit_groups:
|
84
|
-
raise HTTPException(
|
85
|
-
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
86
|
-
detail=(
|
87
|
-
"Cannot modify both user attributes and group membership. "
|
88
|
-
"Please make two independent PATCH calls"
|
89
|
-
),
|
90
|
-
)
|
91
|
-
|
92
67
|
# Check that user exists
|
93
68
|
user_to_patch = await _user_or_404(user_id, db)
|
94
69
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
70
|
+
# Modify user attributes
|
71
|
+
try:
|
72
|
+
user = await user_manager.update(
|
73
|
+
user_update,
|
74
|
+
user_to_patch,
|
75
|
+
safe=False,
|
76
|
+
request=None,
|
77
|
+
)
|
78
|
+
validated_user = schemas.model_validate(UserOAuth, user)
|
79
|
+
patched_user = await db.get(
|
80
|
+
UserOAuth, validated_user.id, populate_existing=True
|
81
|
+
)
|
82
|
+
except exceptions.InvalidPasswordException as e:
|
83
|
+
raise HTTPException(
|
84
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
85
|
+
detail={
|
86
|
+
"code": ErrorCode.UPDATE_USER_INVALID_PASSWORD,
|
87
|
+
"reason": e.reason,
|
88
|
+
},
|
89
|
+
)
|
90
|
+
except exceptions.UserAlreadyExists:
|
91
|
+
raise HTTPException(
|
92
|
+
status.HTTP_400_BAD_REQUEST,
|
93
|
+
detail=ErrorCode.UPDATE_USER_EMAIL_ALREADY_EXISTS,
|
103
94
|
)
|
104
|
-
res = await db.execute(stm)
|
105
|
-
number_matching_groups = res.scalar()
|
106
|
-
if number_matching_groups != len(user_update.new_group_ids):
|
107
|
-
raise HTTPException(
|
108
|
-
status_code=status.HTTP_404_NOT_FOUND,
|
109
|
-
detail=(
|
110
|
-
"Not all requested groups (IDs: "
|
111
|
-
f"{user_update.new_group_ids}) exist."
|
112
|
-
),
|
113
|
-
)
|
114
|
-
|
115
|
-
for new_group_id in user_update.new_group_ids:
|
116
|
-
link = LinkUserGroup(user_id=user_id, group_id=new_group_id)
|
117
|
-
db.add(link)
|
118
|
-
|
119
|
-
try:
|
120
|
-
await db.commit()
|
121
|
-
except IntegrityError as e:
|
122
|
-
error_msg = (
|
123
|
-
f"Cannot link groups with IDs {user_update.new_group_ids} "
|
124
|
-
f"to user {user_id}. "
|
125
|
-
"Likely reason: one of these links already exists.\n"
|
126
|
-
f"Original error: {str(e)}"
|
127
|
-
)
|
128
|
-
logger.info(error_msg)
|
129
|
-
raise HTTPException(
|
130
|
-
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
131
|
-
detail=error_msg,
|
132
|
-
)
|
133
|
-
patched_user = user_to_patch
|
134
|
-
elif edit_attributes:
|
135
|
-
# Modify user attributes
|
136
|
-
try:
|
137
|
-
user_update_without_groups = UserUpdate(
|
138
|
-
**user_update_dict_without_groups
|
139
|
-
)
|
140
|
-
user = await user_manager.update(
|
141
|
-
user_update_without_groups,
|
142
|
-
user_to_patch,
|
143
|
-
safe=False,
|
144
|
-
request=None,
|
145
|
-
)
|
146
|
-
validated_user = schemas.model_validate(UserOAuth, user)
|
147
|
-
patched_user = await db.get(
|
148
|
-
UserOAuth, validated_user.id, populate_existing=True
|
149
|
-
)
|
150
|
-
except exceptions.InvalidPasswordException as e:
|
151
|
-
raise HTTPException(
|
152
|
-
status_code=status.HTTP_400_BAD_REQUEST,
|
153
|
-
detail={
|
154
|
-
"code": ErrorCode.UPDATE_USER_INVALID_PASSWORD,
|
155
|
-
"reason": e.reason,
|
156
|
-
},
|
157
|
-
)
|
158
|
-
except exceptions.UserAlreadyExists:
|
159
|
-
raise HTTPException(
|
160
|
-
status.HTTP_400_BAD_REQUEST,
|
161
|
-
detail=ErrorCode.UPDATE_USER_EMAIL_ALREADY_EXISTS,
|
162
|
-
)
|
163
|
-
else:
|
164
|
-
# Nothing to do, just continue
|
165
|
-
patched_user = user_to_patch
|
166
95
|
|
167
96
|
# Enrich user object with `group_ids_names` attribute
|
168
97
|
patched_user_with_groups = await _get_single_user_with_groups(
|
@@ -203,6 +132,75 @@ async def list_users(
|
|
203
132
|
return user_list
|
204
133
|
|
205
134
|
|
135
|
+
@router_users.post("/users/{user_id}/set-groups/", response_model=UserRead)
|
136
|
+
async def set_user_groups(
|
137
|
+
user_id: int,
|
138
|
+
user_update: UserUpdateGroups,
|
139
|
+
superuser: UserOAuth = Depends(current_active_superuser),
|
140
|
+
db: AsyncSession = Depends(get_async_db),
|
141
|
+
) -> UserRead:
|
142
|
+
|
143
|
+
# Preliminary check that all objects exist in the db
|
144
|
+
user = await _user_or_404(user_id=user_id, db=db)
|
145
|
+
target_group_ids = user_update.group_ids
|
146
|
+
stm = select(func.count(UserGroup.id)).where(
|
147
|
+
UserGroup.id.in_(target_group_ids)
|
148
|
+
)
|
149
|
+
res = await db.execute(stm)
|
150
|
+
count = res.scalar()
|
151
|
+
if count != len(target_group_ids):
|
152
|
+
raise HTTPException(
|
153
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
154
|
+
detail=f"Some UserGroups in {target_group_ids} do not exist.",
|
155
|
+
)
|
156
|
+
|
157
|
+
# Check that default group is not being removed
|
158
|
+
default_group_id = await _get_default_usergroup_id(db=db)
|
159
|
+
if default_group_id not in target_group_ids:
|
160
|
+
raise HTTPException(
|
161
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
162
|
+
detail=(
|
163
|
+
f"Cannot remove user from "
|
164
|
+
f"'{FRACTAL_DEFAULT_GROUP_NAME}' group.",
|
165
|
+
),
|
166
|
+
)
|
167
|
+
|
168
|
+
# Prepare lists of links to be removed
|
169
|
+
res = await db.execute(
|
170
|
+
select(LinkUserGroup)
|
171
|
+
.where(LinkUserGroup.user_id == user_id)
|
172
|
+
.where(LinkUserGroup.group_id.not_in(target_group_ids))
|
173
|
+
)
|
174
|
+
links_to_remove = res.scalars().all()
|
175
|
+
|
176
|
+
# Prepare lists of links to be added
|
177
|
+
res = await db.execute(
|
178
|
+
select(LinkUserGroup.group_id)
|
179
|
+
.where(LinkUserGroup.user_id == user_id)
|
180
|
+
.where(LinkUserGroup.group_id.in_(target_group_ids))
|
181
|
+
)
|
182
|
+
ids_links_already_in = res.scalars().all()
|
183
|
+
ids_links_to_add = set(target_group_ids) - set(ids_links_already_in)
|
184
|
+
|
185
|
+
# Remove/create links as needed
|
186
|
+
for link in links_to_remove:
|
187
|
+
logger.info(
|
188
|
+
f"Removing LinkUserGroup with {link.user_id=} "
|
189
|
+
f"and {link.group_id=}."
|
190
|
+
)
|
191
|
+
await db.delete(link)
|
192
|
+
for group_id in ids_links_to_add:
|
193
|
+
logger.info(
|
194
|
+
f"Creating new LinkUserGroup with {user_id=} " f"and {group_id=}."
|
195
|
+
)
|
196
|
+
db.add(LinkUserGroup(user_id=user_id, group_id=group_id))
|
197
|
+
await db.commit()
|
198
|
+
|
199
|
+
user_with_groups = await _get_single_user_with_groups(user, db)
|
200
|
+
|
201
|
+
return user_with_groups
|
202
|
+
|
203
|
+
|
206
204
|
@router_users.get(
|
207
205
|
"/users/{user_id}/settings/", response_model=UserSettingsRead
|
208
206
|
)
|
@@ -0,0 +1,20 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
from fastapi import HTTPException
|
5
|
+
from fastapi import status
|
6
|
+
|
7
|
+
|
8
|
+
def _raise_if_naive_datetime(*timestamps: tuple[Optional[datetime]]) -> None:
|
9
|
+
"""
|
10
|
+
Raise 422 if any not-null argument is a naive `datetime` object:
|
11
|
+
https://docs.python.org/3/library/datetime.html#determining-if-an-object-is-aware-or-naive
|
12
|
+
"""
|
13
|
+
for timestamp in filter(None, timestamps):
|
14
|
+
if (timestamp.tzinfo is None) or (
|
15
|
+
timestamp.tzinfo.utcoffset(timestamp) is None
|
16
|
+
):
|
17
|
+
raise HTTPException(
|
18
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
19
|
+
detail=f"{timestamp=} is naive. You must provide a timezone.",
|
20
|
+
)
|
@@ -456,20 +456,3 @@ def _parse_mem_value(raw_mem: Union[str, int]) -> int:
|
|
456
456
|
|
457
457
|
logger.debug(f"{info}, return {mem_MB}")
|
458
458
|
return mem_MB
|
459
|
-
|
460
|
-
|
461
|
-
def get_default_slurm_config():
|
462
|
-
"""
|
463
|
-
Return a default `SlurmConfig` configuration object
|
464
|
-
"""
|
465
|
-
return SlurmConfig(
|
466
|
-
partition="main",
|
467
|
-
cpus_per_task=1,
|
468
|
-
mem_per_task_MB=100,
|
469
|
-
target_cpus_per_job=1,
|
470
|
-
max_cpus_per_job=2,
|
471
|
-
target_mem_per_job=100,
|
472
|
-
max_mem_per_job=500,
|
473
|
-
target_num_jobs=2,
|
474
|
-
max_num_jobs=4,
|
475
|
-
)
|
@@ -26,15 +26,16 @@ from typing import Sequence
|
|
26
26
|
|
27
27
|
import cloudpickle
|
28
28
|
from cfut import SlurmExecutor
|
29
|
-
from paramiko.ssh_exception import NoValidConnectionsError
|
30
29
|
|
31
30
|
from ....filenames import SHUTDOWN_FILENAME
|
32
31
|
from ....task_files import get_task_file_paths
|
33
32
|
from ....task_files import TaskFiles
|
34
33
|
from ....versions import get_versions
|
35
|
-
from ...slurm._slurm_config import get_default_slurm_config
|
36
34
|
from ...slurm._slurm_config import SlurmConfig
|
37
35
|
from .._batching import heuristics
|
36
|
+
from ..utils_executors import get_pickle_file_path
|
37
|
+
from ..utils_executors import get_slurm_file_path
|
38
|
+
from ..utils_executors import get_slurm_script_file_path
|
38
39
|
from ._executor_wait_thread import FractalSlurmWaitThread
|
39
40
|
from fractal_server.app.runner.components import _COMPONENT_KEY_
|
40
41
|
from fractal_server.app.runner.compress_folder import compress_folder
|
@@ -224,132 +225,12 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
224
225
|
with self.jobs_lock:
|
225
226
|
self.map_jobid_to_slurm_files_local.pop(jobid)
|
226
227
|
|
227
|
-
def get_input_pickle_file_path_local(
|
228
|
-
self, *, arg: str, subfolder_name: str, prefix: Optional[str] = None
|
229
|
-
) -> Path:
|
230
|
-
|
231
|
-
prefix = prefix or "cfut"
|
232
|
-
output = (
|
233
|
-
self.workflow_dir_local
|
234
|
-
/ subfolder_name
|
235
|
-
/ f"{prefix}_in_{arg}.pickle"
|
236
|
-
)
|
237
|
-
return output
|
238
|
-
|
239
|
-
def get_input_pickle_file_path_remote(
|
240
|
-
self, *, arg: str, subfolder_name: str, prefix: Optional[str] = None
|
241
|
-
) -> Path:
|
242
|
-
|
243
|
-
prefix = prefix or "cfut"
|
244
|
-
output = (
|
245
|
-
self.workflow_dir_remote
|
246
|
-
/ subfolder_name
|
247
|
-
/ f"{prefix}_in_{arg}.pickle"
|
248
|
-
)
|
249
|
-
return output
|
250
|
-
|
251
|
-
def get_output_pickle_file_path_local(
|
252
|
-
self, *, arg: str, subfolder_name: str, prefix: Optional[str] = None
|
253
|
-
) -> Path:
|
254
|
-
prefix = prefix or "cfut"
|
255
|
-
return (
|
256
|
-
self.workflow_dir_local
|
257
|
-
/ subfolder_name
|
258
|
-
/ f"{prefix}_out_{arg}.pickle"
|
259
|
-
)
|
260
|
-
|
261
|
-
def get_output_pickle_file_path_remote(
|
262
|
-
self, *, arg: str, subfolder_name: str, prefix: Optional[str] = None
|
263
|
-
) -> Path:
|
264
|
-
prefix = prefix or "cfut"
|
265
|
-
return (
|
266
|
-
self.workflow_dir_remote
|
267
|
-
/ subfolder_name
|
268
|
-
/ f"{prefix}_out_{arg}.pickle"
|
269
|
-
)
|
270
|
-
|
271
|
-
def get_slurm_script_file_path_local(
|
272
|
-
self, *, subfolder_name: str, prefix: Optional[str] = None
|
273
|
-
) -> Path:
|
274
|
-
prefix = prefix or "_temp"
|
275
|
-
return (
|
276
|
-
self.workflow_dir_local
|
277
|
-
/ subfolder_name
|
278
|
-
/ f"{prefix}_slurm_submit.sbatch"
|
279
|
-
)
|
280
|
-
|
281
|
-
def get_slurm_script_file_path_remote(
|
282
|
-
self, *, subfolder_name: str, prefix: Optional[str] = None
|
283
|
-
) -> Path:
|
284
|
-
prefix = prefix or "_temp"
|
285
|
-
return (
|
286
|
-
self.workflow_dir_remote
|
287
|
-
/ subfolder_name
|
288
|
-
/ f"{prefix}_slurm_submit.sbatch"
|
289
|
-
)
|
290
|
-
|
291
|
-
def get_slurm_stdout_file_path_local(
|
292
|
-
self,
|
293
|
-
*,
|
294
|
-
subfolder_name: str,
|
295
|
-
arg: str = "%j",
|
296
|
-
prefix: Optional[str] = None,
|
297
|
-
) -> Path:
|
298
|
-
prefix = prefix or "slurmpy.stdout"
|
299
|
-
return (
|
300
|
-
self.workflow_dir_local
|
301
|
-
/ subfolder_name
|
302
|
-
/ f"{prefix}_slurm_{arg}.out"
|
303
|
-
)
|
304
|
-
|
305
|
-
def get_slurm_stdout_file_path_remote(
|
306
|
-
self,
|
307
|
-
*,
|
308
|
-
subfolder_name: str,
|
309
|
-
arg: str = "%j",
|
310
|
-
prefix: Optional[str] = None,
|
311
|
-
) -> Path:
|
312
|
-
prefix = prefix or "slurmpy.stdout"
|
313
|
-
return (
|
314
|
-
self.workflow_dir_remote
|
315
|
-
/ subfolder_name
|
316
|
-
/ f"{prefix}_slurm_{arg}.out"
|
317
|
-
)
|
318
|
-
|
319
|
-
def get_slurm_stderr_file_path_local(
|
320
|
-
self,
|
321
|
-
*,
|
322
|
-
subfolder_name: str,
|
323
|
-
arg: str = "%j",
|
324
|
-
prefix: Optional[str] = None,
|
325
|
-
) -> Path:
|
326
|
-
prefix = prefix or "slurmpy.stderr"
|
327
|
-
return (
|
328
|
-
self.workflow_dir_local
|
329
|
-
/ subfolder_name
|
330
|
-
/ f"{prefix}_slurm_{arg}.err"
|
331
|
-
)
|
332
|
-
|
333
|
-
def get_slurm_stderr_file_path_remote(
|
334
|
-
self,
|
335
|
-
*,
|
336
|
-
subfolder_name: str,
|
337
|
-
arg: str = "%j",
|
338
|
-
prefix: Optional[str] = None,
|
339
|
-
) -> Path:
|
340
|
-
prefix = prefix or "slurmpy.stderr"
|
341
|
-
return (
|
342
|
-
self.workflow_dir_remote
|
343
|
-
/ subfolder_name
|
344
|
-
/ f"{prefix}_slurm_{arg}.err"
|
345
|
-
)
|
346
|
-
|
347
228
|
def submit(
|
348
229
|
self,
|
349
230
|
fun: Callable[..., Any],
|
350
231
|
*fun_args: Sequence[Any],
|
351
|
-
slurm_config:
|
352
|
-
task_files:
|
232
|
+
slurm_config: SlurmConfig,
|
233
|
+
task_files: TaskFiles,
|
353
234
|
**fun_kwargs: dict,
|
354
235
|
) -> Future:
|
355
236
|
"""
|
@@ -360,11 +241,9 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
360
241
|
fun_args: Function positional arguments
|
361
242
|
fun_kwargs: Function keyword arguments
|
362
243
|
slurm_config:
|
363
|
-
A `SlurmConfig` object
|
364
|
-
`get_default_slurm_config()`.
|
244
|
+
A `SlurmConfig` object.
|
365
245
|
task_files:
|
366
|
-
A `TaskFiles` object
|
367
|
-
`self.get_default_task_files()`.
|
246
|
+
A `TaskFiles` object.
|
368
247
|
|
369
248
|
Returns:
|
370
249
|
Future representing the execution of the current SLURM job.
|
@@ -376,12 +255,6 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
376
255
|
logger.warning(error_msg)
|
377
256
|
raise JobExecutionError(info=error_msg)
|
378
257
|
|
379
|
-
# Set defaults, if needed
|
380
|
-
if slurm_config is None:
|
381
|
-
slurm_config = get_default_slurm_config()
|
382
|
-
if task_files is None:
|
383
|
-
task_files = self.get_default_task_files()
|
384
|
-
|
385
258
|
# Set slurm_file_prefix
|
386
259
|
slurm_file_prefix = task_files.file_prefix
|
387
260
|
|
@@ -409,15 +282,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
409
282
|
args=fun_args,
|
410
283
|
kwargs=fun_kwargs,
|
411
284
|
)
|
412
|
-
|
413
|
-
self._put_subfolder_sftp(jobs=[job])
|
414
|
-
except NoValidConnectionsError as e:
|
415
|
-
logger.error("NoValidConnectionError")
|
416
|
-
logger.error(f"{str(e)=}")
|
417
|
-
logger.error(f"{e.errors=}")
|
418
|
-
for err in e.errors:
|
419
|
-
logger.error(f"{str(err)}")
|
420
|
-
raise e
|
285
|
+
self._put_subfolder_sftp(jobs=[job])
|
421
286
|
future, job_id_str = self._submit_job(job)
|
422
287
|
self.wait_thread.wait(job_id=job_id_str)
|
423
288
|
return future
|
@@ -427,8 +292,8 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
427
292
|
fn: Callable[..., Any],
|
428
293
|
iterable: list[Sequence[Any]],
|
429
294
|
*,
|
430
|
-
slurm_config:
|
431
|
-
task_files:
|
295
|
+
slurm_config: SlurmConfig,
|
296
|
+
task_files: TaskFiles,
|
432
297
|
):
|
433
298
|
"""
|
434
299
|
Return an iterator with the results of several execution of a function
|
@@ -451,12 +316,9 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
451
316
|
An iterable such that each element is the list of arguments to
|
452
317
|
be passed to `fn`, as in `fn(*args)`.
|
453
318
|
slurm_config:
|
454
|
-
A `SlurmConfig` object
|
455
|
-
`get_default_slurm_config()`.
|
319
|
+
A `SlurmConfig` object.
|
456
320
|
task_files:
|
457
|
-
A `TaskFiles` object
|
458
|
-
`self.get_default_task_files()`.
|
459
|
-
|
321
|
+
A `TaskFiles` object.
|
460
322
|
"""
|
461
323
|
|
462
324
|
# Do not continue if auxiliary thread was shut down
|
@@ -481,12 +343,6 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
481
343
|
# self._exception
|
482
344
|
del fut
|
483
345
|
|
484
|
-
# Set defaults, if needed
|
485
|
-
if not slurm_config:
|
486
|
-
slurm_config = get_default_slurm_config()
|
487
|
-
if task_files is None:
|
488
|
-
task_files = self.get_default_task_files()
|
489
|
-
|
490
346
|
# Include common_script_lines in extra_lines
|
491
347
|
logger.debug(
|
492
348
|
f"Adding {self.common_script_lines=} to "
|
@@ -559,16 +415,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
559
415
|
current_component_index += batch_size
|
560
416
|
logger.debug("[map] Job preparation - END")
|
561
417
|
|
562
|
-
|
563
|
-
self._put_subfolder_sftp(jobs=jobs_to_submit)
|
564
|
-
except NoValidConnectionsError as e:
|
565
|
-
logger.error("NoValidConnectionError")
|
566
|
-
logger.error(f"{str(e)=}")
|
567
|
-
logger.error(f"{e.errors=}")
|
568
|
-
for err in e.errors:
|
569
|
-
logger.error(f"{str(err)}")
|
570
|
-
|
571
|
-
raise e
|
418
|
+
self._put_subfolder_sftp(jobs=jobs_to_submit)
|
572
419
|
|
573
420
|
# Construct list of futures (one per SLURM job, i.e. one per batch)
|
574
421
|
# FIXME SSH: we may create a single `_submit_many_jobs` method to
|
@@ -728,63 +575,80 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
728
575
|
f"Missing folder {subfolder_path.as_posix()}."
|
729
576
|
)
|
730
577
|
|
731
|
-
# Define I/O pickle file local/remote paths
|
732
578
|
job.input_pickle_files_local = tuple(
|
733
|
-
|
579
|
+
get_pickle_file_path(
|
734
580
|
arg=job.workerids[ind],
|
581
|
+
workflow_dir=self.workflow_dir_local,
|
735
582
|
subfolder_name=job.wftask_subfolder_name,
|
583
|
+
in_or_out="in",
|
736
584
|
prefix=job.wftask_file_prefixes[ind],
|
737
585
|
)
|
738
586
|
for ind in range(job.num_tasks_tot)
|
739
587
|
)
|
588
|
+
|
740
589
|
job.input_pickle_files_remote = tuple(
|
741
|
-
|
590
|
+
get_pickle_file_path(
|
742
591
|
arg=job.workerids[ind],
|
592
|
+
workflow_dir=self.workflow_dir_remote,
|
743
593
|
subfolder_name=job.wftask_subfolder_name,
|
594
|
+
in_or_out="in",
|
744
595
|
prefix=job.wftask_file_prefixes[ind],
|
745
596
|
)
|
746
597
|
for ind in range(job.num_tasks_tot)
|
747
598
|
)
|
748
599
|
job.output_pickle_files_local = tuple(
|
749
|
-
|
600
|
+
get_pickle_file_path(
|
750
601
|
arg=job.workerids[ind],
|
602
|
+
workflow_dir=self.workflow_dir_local,
|
751
603
|
subfolder_name=job.wftask_subfolder_name,
|
604
|
+
in_or_out="out",
|
752
605
|
prefix=job.wftask_file_prefixes[ind],
|
753
606
|
)
|
754
607
|
for ind in range(job.num_tasks_tot)
|
755
608
|
)
|
756
609
|
job.output_pickle_files_remote = tuple(
|
757
|
-
|
610
|
+
get_pickle_file_path(
|
758
611
|
arg=job.workerids[ind],
|
612
|
+
workflow_dir=self.workflow_dir_remote,
|
759
613
|
subfolder_name=job.wftask_subfolder_name,
|
614
|
+
in_or_out="out",
|
760
615
|
prefix=job.wftask_file_prefixes[ind],
|
761
616
|
)
|
762
617
|
for ind in range(job.num_tasks_tot)
|
763
618
|
)
|
764
|
-
|
765
|
-
|
766
|
-
|
619
|
+
# define slurm-job file local/remote paths
|
620
|
+
job.slurm_script_local = get_slurm_script_file_path(
|
621
|
+
workflow_dir=self.workflow_dir_local,
|
767
622
|
subfolder_name=job.wftask_subfolder_name,
|
768
623
|
prefix=job.slurm_file_prefix,
|
769
624
|
)
|
770
|
-
job.slurm_script_remote =
|
625
|
+
job.slurm_script_remote = get_slurm_script_file_path(
|
626
|
+
workflow_dir=self.workflow_dir_remote,
|
771
627
|
subfolder_name=job.wftask_subfolder_name,
|
772
628
|
prefix=job.slurm_file_prefix,
|
773
629
|
)
|
774
|
-
job.slurm_stdout_local =
|
630
|
+
job.slurm_stdout_local = get_slurm_file_path(
|
631
|
+
workflow_dir=self.workflow_dir_local,
|
775
632
|
subfolder_name=job.wftask_subfolder_name,
|
633
|
+
out_or_err="out",
|
776
634
|
prefix=job.slurm_file_prefix,
|
777
635
|
)
|
778
|
-
job.slurm_stdout_remote =
|
636
|
+
job.slurm_stdout_remote = get_slurm_file_path(
|
637
|
+
workflow_dir=self.workflow_dir_remote,
|
779
638
|
subfolder_name=job.wftask_subfolder_name,
|
639
|
+
out_or_err="out",
|
780
640
|
prefix=job.slurm_file_prefix,
|
781
641
|
)
|
782
|
-
job.slurm_stderr_local =
|
642
|
+
job.slurm_stderr_local = get_slurm_file_path(
|
643
|
+
workflow_dir=self.workflow_dir_local,
|
783
644
|
subfolder_name=job.wftask_subfolder_name,
|
645
|
+
out_or_err="err",
|
784
646
|
prefix=job.slurm_file_prefix,
|
785
647
|
)
|
786
|
-
job.slurm_stderr_remote =
|
648
|
+
job.slurm_stderr_remote = get_slurm_file_path(
|
649
|
+
workflow_dir=self.workflow_dir_remote,
|
787
650
|
subfolder_name=job.wftask_subfolder_name,
|
651
|
+
out_or_err="err",
|
788
652
|
prefix=job.slurm_file_prefix,
|
789
653
|
)
|
790
654
|
|
@@ -1073,16 +937,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1073
937
|
self.jobs_empty_cond.notify_all()
|
1074
938
|
|
1075
939
|
# Fetch subfolder from remote host
|
1076
|
-
|
1077
|
-
self._get_subfolder_sftp(jobs=jobs)
|
1078
|
-
except NoValidConnectionsError as e:
|
1079
|
-
logger.error("NoValidConnectionError")
|
1080
|
-
logger.error(f"{str(e)=}")
|
1081
|
-
logger.error(f"{e.errors=}")
|
1082
|
-
for err in e.errors:
|
1083
|
-
logger.error(f"{str(err)}")
|
1084
|
-
|
1085
|
-
raise e
|
940
|
+
self._get_subfolder_sftp(jobs=jobs)
|
1086
941
|
|
1087
942
|
# First round of checking whether all output files exist
|
1088
943
|
missing_out_paths = []
|
@@ -1321,7 +1176,6 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1321
1176
|
slurm_err_path: str,
|
1322
1177
|
slurm_config: SlurmConfig,
|
1323
1178
|
):
|
1324
|
-
|
1325
1179
|
num_tasks_max_running = slurm_config.parallel_tasks_per_job
|
1326
1180
|
mem_per_task_MB = slurm_config.mem_per_task_MB
|
1327
1181
|
|
@@ -1373,19 +1227,6 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1373
1227
|
script = "\n".join(script_lines)
|
1374
1228
|
return script
|
1375
1229
|
|
1376
|
-
def get_default_task_files(self) -> TaskFiles:
|
1377
|
-
"""
|
1378
|
-
This will be called when self.submit or self.map are called from
|
1379
|
-
outside fractal-server, and then lack some optional arguments.
|
1380
|
-
"""
|
1381
|
-
task_files = TaskFiles(
|
1382
|
-
workflow_dir_local=self.workflow_dir_local,
|
1383
|
-
workflow_dir_remote=self.workflow_dir_remote,
|
1384
|
-
task_order=None,
|
1385
|
-
task_name="name",
|
1386
|
-
)
|
1387
|
-
return task_files
|
1388
|
-
|
1389
1230
|
def shutdown(self, wait=True, *, cancel_futures=False):
|
1390
1231
|
"""
|
1391
1232
|
Clean up all executor variables. Note that this function is executed on
|
@@ -1527,7 +1368,11 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1527
1368
|
logger.info("[FractalSlurmSSHExecutor.ssh_handshake] START")
|
1528
1369
|
cmd = f"{self.python_remote} -m fractal_server.app.runner.versions"
|
1529
1370
|
stdout = self.fractal_ssh.run_command(cmd=cmd)
|
1530
|
-
|
1371
|
+
try:
|
1372
|
+
remote_versions = json.loads(stdout.strip("\n"))
|
1373
|
+
except json.decoder.JSONDecodeError as e:
|
1374
|
+
logger.error("Fractal server versions not available")
|
1375
|
+
raise e
|
1531
1376
|
|
1532
1377
|
# Check compatibility with local versions
|
1533
1378
|
local_versions = get_versions()
|