fractal-server 2.3.0a2__py3-none-any.whl → 2.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/routes/api/v1/task_collection.py +2 -2
- fractal_server/app/routes/api/v2/__init__.py +8 -16
- fractal_server/app/routes/api/v2/submit.py +1 -1
- fractal_server/app/routes/api/v2/task_collection.py +72 -17
- fractal_server/app/routes/api/v2/task_collection_custom.py +26 -4
- fractal_server/app/runner/executors/slurm/ssh/executor.py +19 -30
- fractal_server/app/runner/task_files.py +3 -14
- fractal_server/app/runner/v2/__init__.py +5 -8
- fractal_server/app/runner/v2/_slurm_ssh/__init__.py +5 -6
- fractal_server/main.py +7 -5
- fractal_server/ssh/_fabric.py +295 -142
- fractal_server/string_tools.py +39 -0
- fractal_server/tasks/utils.py +0 -4
- fractal_server/tasks/v1/background_operations.py +2 -2
- fractal_server/tasks/v2/background_operations.py +2 -2
- fractal_server/tasks/v2/background_operations_ssh.py +36 -17
- {fractal_server-2.3.0a2.dist-info → fractal_server-2.3.1.dist-info}/METADATA +1 -1
- {fractal_server-2.3.0a2.dist-info → fractal_server-2.3.1.dist-info}/RECORD +22 -22
- fractal_server/app/routes/api/v2/task_collection_ssh.py +0 -125
- {fractal_server-2.3.0a2.dist-info → fractal_server-2.3.1.dist-info}/LICENSE +0 -0
- {fractal_server-2.3.0a2.dist-info → fractal_server-2.3.1.dist-info}/WHEEL +0 -0
- {fractal_server-2.3.0a2.dist-info → fractal_server-2.3.1.dist-info}/entry_points.txt +0 -0
fractal_server/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__VERSION__ = "2.3.
|
1
|
+
__VERSION__ = "2.3.1"
|
@@ -25,8 +25,8 @@ from ....schemas.v1 import TaskCollectStatusV1
|
|
25
25
|
from ....security import current_active_user
|
26
26
|
from ....security import current_active_verified_user
|
27
27
|
from ....security import User
|
28
|
+
from fractal_server.string_tools import slugify_task_name_for_source
|
28
29
|
from fractal_server.tasks.utils import get_collection_log
|
29
|
-
from fractal_server.tasks.utils import slugify_task_name
|
30
30
|
from fractal_server.tasks.v1._TaskCollectPip import _TaskCollectPip
|
31
31
|
from fractal_server.tasks.v1.background_operations import (
|
32
32
|
background_collect_pip,
|
@@ -159,7 +159,7 @@ async def collect_tasks_pip(
|
|
159
159
|
|
160
160
|
# Check that tasks are not already in the DB
|
161
161
|
for new_task in task_pkg.package_manifest.task_list:
|
162
|
-
new_task_name_slug =
|
162
|
+
new_task_name_slug = slugify_task_name_for_source(new_task.name)
|
163
163
|
new_task_source = f"{task_pkg.package_source}:{new_task_name_slug}"
|
164
164
|
stm = select(Task).where(Task.source == new_task_source)
|
165
165
|
res = await db.execute(stm)
|
@@ -12,7 +12,6 @@ from .submit import router as submit_job_router_v2
|
|
12
12
|
from .task import router as task_router_v2
|
13
13
|
from .task_collection import router as task_collection_router_v2
|
14
14
|
from .task_collection_custom import router as task_collection_router_v2_custom
|
15
|
-
from .task_collection_ssh import router as task_collection_router_v2_ssh
|
16
15
|
from .task_legacy import router as task_legacy_router_v2
|
17
16
|
from .workflow import router as workflow_router_v2
|
18
17
|
from .workflowtask import router as workflowtask_router_v2
|
@@ -30,21 +29,14 @@ router_api_v2.include_router(submit_job_router_v2, tags=["V2 Job"])
|
|
30
29
|
|
31
30
|
|
32
31
|
settings = Inject(get_settings)
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
task_collection_router_v2, prefix="/task", tags=["V2 Task Collection"]
|
42
|
-
)
|
43
|
-
router_api_v2.include_router(
|
44
|
-
task_collection_router_v2_custom,
|
45
|
-
prefix="/task",
|
46
|
-
tags=["V2 Task Collection"],
|
47
|
-
)
|
32
|
+
router_api_v2.include_router(
|
33
|
+
task_collection_router_v2, prefix="/task", tags=["V2 Task Collection"]
|
34
|
+
)
|
35
|
+
router_api_v2.include_router(
|
36
|
+
task_collection_router_v2_custom,
|
37
|
+
prefix="/task",
|
38
|
+
tags=["V2 Task Collection"],
|
39
|
+
)
|
48
40
|
router_api_v2.include_router(task_router_v2, prefix="/task", tags=["V2 Task"])
|
49
41
|
router_api_v2.include_router(
|
50
42
|
task_legacy_router_v2, prefix="/task-legacy", tags=["V2 Task Legacy"]
|
@@ -246,7 +246,7 @@ async def apply_workflow(
|
|
246
246
|
worker_init=job.worker_init,
|
247
247
|
slurm_user=user.slurm_user,
|
248
248
|
user_cache_dir=user.cache_dir,
|
249
|
-
|
249
|
+
fractal_ssh=request.app.state.fractal_ssh,
|
250
250
|
)
|
251
251
|
request.app.state.jobsV2.append(job.id)
|
252
252
|
logger.info(
|
@@ -7,6 +7,7 @@ from fastapi import APIRouter
|
|
7
7
|
from fastapi import BackgroundTasks
|
8
8
|
from fastapi import Depends
|
9
9
|
from fastapi import HTTPException
|
10
|
+
from fastapi import Request
|
10
11
|
from fastapi import Response
|
11
12
|
from fastapi import status
|
12
13
|
from pydantic.error_wrappers import ValidationError
|
@@ -27,10 +28,10 @@ from ....schemas.v2 import TaskReadV2
|
|
27
28
|
from ....security import current_active_user
|
28
29
|
from ....security import current_active_verified_user
|
29
30
|
from ....security import User
|
31
|
+
from fractal_server.string_tools import slugify_task_name_for_source
|
30
32
|
from fractal_server.tasks.utils import get_absolute_venv_path
|
31
33
|
from fractal_server.tasks.utils import get_collection_log
|
32
34
|
from fractal_server.tasks.utils import get_collection_path
|
33
|
-
from fractal_server.tasks.utils import slugify_task_name
|
34
35
|
from fractal_server.tasks.v2._TaskCollectPip import _TaskCollectPip
|
35
36
|
from fractal_server.tasks.v2.background_operations import (
|
36
37
|
background_collect_pip,
|
@@ -38,6 +39,7 @@ from fractal_server.tasks.v2.background_operations import (
|
|
38
39
|
from fractal_server.tasks.v2.endpoint_operations import create_package_dir_pip
|
39
40
|
from fractal_server.tasks.v2.endpoint_operations import download_package
|
40
41
|
from fractal_server.tasks.v2.endpoint_operations import inspect_package
|
42
|
+
from fractal_server.tasks.v2.utils import get_python_interpreter_v2
|
41
43
|
|
42
44
|
|
43
45
|
router = APIRouter()
|
@@ -66,6 +68,7 @@ async def collect_tasks_pip(
|
|
66
68
|
task_collect: TaskCollectPipV2,
|
67
69
|
background_tasks: BackgroundTasks,
|
68
70
|
response: Response,
|
71
|
+
request: Request,
|
69
72
|
user: User = Depends(current_active_verified_user),
|
70
73
|
db: AsyncSession = Depends(get_async_db),
|
71
74
|
) -> CollectionStateReadV2:
|
@@ -76,17 +79,26 @@ async def collect_tasks_pip(
|
|
76
79
|
of a package and the collection of tasks as advertised in the manifest.
|
77
80
|
"""
|
78
81
|
|
79
|
-
|
82
|
+
# Get settings
|
83
|
+
settings = Inject(get_settings)
|
80
84
|
|
81
|
-
# Set
|
85
|
+
# Set/check python version
|
82
86
|
if task_collect.python_version is None:
|
83
|
-
settings = Inject(get_settings)
|
84
87
|
task_collect.python_version = (
|
85
88
|
settings.FRACTAL_TASKS_PYTHON_DEFAULT_VERSION
|
86
89
|
)
|
90
|
+
try:
|
91
|
+
get_python_interpreter_v2(python_version=task_collect.python_version)
|
92
|
+
except ValueError:
|
93
|
+
raise HTTPException(
|
94
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
95
|
+
detail=(
|
96
|
+
f"Python version {task_collect.python_version} is "
|
97
|
+
"not available for Fractal task collection."
|
98
|
+
),
|
99
|
+
)
|
87
100
|
|
88
|
-
# Validate payload
|
89
|
-
# TaskCollectPip
|
101
|
+
# Validate payload
|
90
102
|
try:
|
91
103
|
task_pkg = _TaskCollectPip(**task_collect.dict(exclude_unset=True))
|
92
104
|
except ValidationError as e:
|
@@ -95,6 +107,37 @@ async def collect_tasks_pip(
|
|
95
107
|
detail=f"Invalid task-collection object. Original error: {e}",
|
96
108
|
)
|
97
109
|
|
110
|
+
# END of SSH/non-SSH common part
|
111
|
+
|
112
|
+
if settings.FRACTAL_RUNNER_BACKEND == "slurm_ssh":
|
113
|
+
|
114
|
+
from fractal_server.tasks.v2.background_operations_ssh import (
|
115
|
+
background_collect_pip_ssh,
|
116
|
+
)
|
117
|
+
|
118
|
+
# Construct and return state
|
119
|
+
state = CollectionStateV2(
|
120
|
+
data=dict(
|
121
|
+
status=CollectionStatusV2.PENDING, package=task_collect.package
|
122
|
+
)
|
123
|
+
)
|
124
|
+
db.add(state)
|
125
|
+
await db.commit()
|
126
|
+
|
127
|
+
background_tasks.add_task(
|
128
|
+
background_collect_pip_ssh,
|
129
|
+
state.id,
|
130
|
+
task_pkg,
|
131
|
+
request.app.state.fractal_ssh,
|
132
|
+
)
|
133
|
+
|
134
|
+
response.status_code = status.HTTP_201_CREATED
|
135
|
+
return state
|
136
|
+
|
137
|
+
# Actual non-SSH endpoint
|
138
|
+
|
139
|
+
logger = set_logger(logger_name="collect_tasks_pip")
|
140
|
+
|
98
141
|
with TemporaryDirectory() as tmpdir:
|
99
142
|
try:
|
100
143
|
# Copy or download the package wheel file to tmpdir
|
@@ -197,7 +240,7 @@ async def collect_tasks_pip(
|
|
197
240
|
|
198
241
|
# Check that tasks are not already in the DB
|
199
242
|
for new_task in task_pkg.package_manifest.task_list:
|
200
|
-
new_task_name_slug =
|
243
|
+
new_task_name_slug = slugify_task_name_for_source(new_task.name)
|
201
244
|
new_task_source = f"{task_pkg.package_source}:{new_task_name_slug}"
|
202
245
|
stm = select(TaskV2).where(TaskV2.source == new_task_source)
|
203
246
|
res = await db.execute(stm)
|
@@ -253,6 +296,7 @@ async def check_collection_status(
|
|
253
296
|
"""
|
254
297
|
Check status of background task collection
|
255
298
|
"""
|
299
|
+
|
256
300
|
logger = set_logger(logger_name="check_collection_status")
|
257
301
|
logger.debug(f"Querying state for state.id={state_id}")
|
258
302
|
state = await db.get(CollectionStateV2, state_id)
|
@@ -263,17 +307,28 @@ async def check_collection_status(
|
|
263
307
|
detail=f"No task collection info with id={state_id}",
|
264
308
|
)
|
265
309
|
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
310
|
+
settings = Inject(get_settings)
|
311
|
+
if settings.FRACTAL_RUNNER_BACKEND == "slurm_ssh":
|
312
|
+
# FIXME SSH: add logic for when data.state["log"] is empty
|
313
|
+
pass
|
314
|
+
else:
|
315
|
+
# Non-SSH mode
|
316
|
+
# In some cases (i.e. a successful or ongoing task collection),
|
317
|
+
# state.data.log is not set; if so, we collect the current logs.
|
318
|
+
if verbose and not state.data.get("log"):
|
319
|
+
if "venv_path" not in state.data.keys():
|
320
|
+
await db.close()
|
321
|
+
raise HTTPException(
|
322
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
323
|
+
detail=(
|
324
|
+
f"No 'venv_path' in CollectionStateV2[{state_id}].data"
|
325
|
+
),
|
326
|
+
)
|
327
|
+
state.data["log"] = get_collection_log(
|
328
|
+
Path(state.data["venv_path"])
|
274
329
|
)
|
275
|
-
|
276
|
-
|
330
|
+
state.data["venv_path"] = str(state.data["venv_path"])
|
331
|
+
|
277
332
|
reset_logger_handlers(logger)
|
278
333
|
await db.close()
|
279
334
|
return state
|
@@ -42,13 +42,35 @@ async def collect_task_custom(
|
|
42
42
|
|
43
43
|
settings = Inject(get_settings)
|
44
44
|
|
45
|
-
if
|
46
|
-
|
47
|
-
if settings.FRACTAL_RUNNER_BACKEND == "slurm_ssh":
|
45
|
+
if settings.FRACTAL_RUNNER_BACKEND == "slurm_ssh":
|
46
|
+
if task_collect.package_root is None:
|
48
47
|
raise HTTPException(
|
49
48
|
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
50
49
|
detail="Cannot infer 'package_root' with 'slurm_ssh' backend.",
|
51
50
|
)
|
51
|
+
else:
|
52
|
+
if not Path(task_collect.python_interpreter).is_file():
|
53
|
+
raise HTTPException(
|
54
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
55
|
+
detail=(
|
56
|
+
f"{task_collect.python_interpreter=} "
|
57
|
+
"doesn't exist or is not a file."
|
58
|
+
),
|
59
|
+
)
|
60
|
+
if (
|
61
|
+
task_collect.package_root is not None
|
62
|
+
and not Path(task_collect.package_root).is_dir()
|
63
|
+
):
|
64
|
+
raise HTTPException(
|
65
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
66
|
+
detail=(
|
67
|
+
f"{task_collect.package_root=} "
|
68
|
+
"doesn't exist or is not a directory."
|
69
|
+
),
|
70
|
+
)
|
71
|
+
|
72
|
+
if task_collect.package_root is None:
|
73
|
+
|
52
74
|
package_name_underscore = task_collect.package_name.replace("-", "_")
|
53
75
|
# Note that python_command is then used as part of a subprocess.run
|
54
76
|
# statement: be careful with mixing `'` and `"`.
|
@@ -61,7 +83,7 @@ async def collect_task_custom(
|
|
61
83
|
)
|
62
84
|
logger.debug(
|
63
85
|
f"Now running {python_command=} through "
|
64
|
-
"{task_collect.python_interpreter}."
|
86
|
+
f"{task_collect.python_interpreter}."
|
65
87
|
)
|
66
88
|
res = subprocess.run( # nosec
|
67
89
|
shlex.split(
|
@@ -27,7 +27,6 @@ from typing import Sequence
|
|
27
27
|
|
28
28
|
import cloudpickle
|
29
29
|
from cfut import SlurmExecutor
|
30
|
-
from fabric.connection import Connection
|
31
30
|
from paramiko.ssh_exception import NoValidConnectionsError
|
32
31
|
|
33
32
|
from ....filenames import SHUTDOWN_FILENAME
|
@@ -44,8 +43,7 @@ from fractal_server.app.runner.exceptions import TaskExecutionError
|
|
44
43
|
from fractal_server.app.runner.executors.slurm.ssh._slurm_job import SlurmJob
|
45
44
|
from fractal_server.config import get_settings
|
46
45
|
from fractal_server.logger import set_logger
|
47
|
-
from fractal_server.ssh._fabric import
|
48
|
-
from fractal_server.ssh._fabric import run_command_over_ssh
|
46
|
+
from fractal_server.ssh._fabric import FractalSSH
|
49
47
|
from fractal_server.syringe import Inject
|
50
48
|
|
51
49
|
logger = set_logger(__name__)
|
@@ -58,7 +56,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
58
56
|
FIXME: docstring
|
59
57
|
|
60
58
|
Attributes:
|
61
|
-
|
59
|
+
fractal_ssh: FractalSSH connection with custom lock
|
62
60
|
shutdown_file:
|
63
61
|
python_remote: Equal to `settings.FRACTAL_SLURM_WORKER_PYTHON`
|
64
62
|
wait_thread_cls: Class for waiting thread
|
@@ -76,7 +74,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
76
74
|
Dictionary with paths of slurm-related files for active jobs
|
77
75
|
"""
|
78
76
|
|
79
|
-
|
77
|
+
fractal_ssh: FractalSSH
|
80
78
|
|
81
79
|
workflow_dir_local: Path
|
82
80
|
workflow_dir_remote: Path
|
@@ -95,8 +93,8 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
95
93
|
def __init__(
|
96
94
|
self,
|
97
95
|
*,
|
98
|
-
#
|
99
|
-
|
96
|
+
# FractalSSH connection
|
97
|
+
fractal_ssh: FractalSSH,
|
100
98
|
# Folders and files
|
101
99
|
workflow_dir_local: Path,
|
102
100
|
workflow_dir_remote: Path,
|
@@ -117,7 +115,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
117
115
|
relevant bits of `cfut.ClusterExecutor.__init__`.
|
118
116
|
|
119
117
|
Args:
|
120
|
-
|
118
|
+
fractal_ssh:
|
121
119
|
workflow_dir_local:
|
122
120
|
workflow_dir_remote:
|
123
121
|
keep_pickle_files:
|
@@ -167,8 +165,8 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
167
165
|
raise ValueError("FRACTAL_SLURM_WORKER_PYTHON is not set. Exit.")
|
168
166
|
|
169
167
|
# Initialize connection and perform handshake
|
170
|
-
self.
|
171
|
-
logger.warning(self.
|
168
|
+
self.fractal_ssh = fractal_ssh
|
169
|
+
logger.warning(self.fractal_ssh)
|
172
170
|
self.handshake()
|
173
171
|
|
174
172
|
# Set/validate parameters for SLURM submission scripts
|
@@ -838,7 +836,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
838
836
|
|
839
837
|
# Transfer archive
|
840
838
|
t_0_put = time.perf_counter()
|
841
|
-
self.
|
839
|
+
self.fractal_ssh.put(
|
842
840
|
local=tarfile_path_local,
|
843
841
|
remote=tarfile_path_remote,
|
844
842
|
)
|
@@ -853,7 +851,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
853
851
|
"fractal_server.app.runner.extract_archive "
|
854
852
|
f"{tarfile_path_remote}"
|
855
853
|
)
|
856
|
-
|
854
|
+
self.fractal_ssh.run_command(cmd=tar_command)
|
857
855
|
|
858
856
|
# Remove local version
|
859
857
|
t_0_rm = time.perf_counter()
|
@@ -875,9 +873,8 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
875
873
|
|
876
874
|
# Submit job to SLURM, and get jobid
|
877
875
|
sbatch_command = f"sbatch --parsable {job.slurm_script_remote}"
|
878
|
-
sbatch_stdout =
|
876
|
+
sbatch_stdout = self.fractal_ssh.run_command(
|
879
877
|
cmd=sbatch_command,
|
880
|
-
connection=self.connection,
|
881
878
|
)
|
882
879
|
|
883
880
|
# Extract SLURM job ID from stdout
|
@@ -1216,9 +1213,9 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1216
1213
|
|
1217
1214
|
# Remove remote tarfile - FIXME SSH: is this needed?
|
1218
1215
|
# rm_command = f"rm {tarfile_path_remote}"
|
1219
|
-
# _run_command_over_ssh(cmd=rm_command,
|
1216
|
+
# _run_command_over_ssh(cmd=rm_command, fractal_ssh=self.fractal_ssh)
|
1220
1217
|
logger.warning(f"Unlink {tarfile_path_remote=} - START")
|
1221
|
-
self.
|
1218
|
+
self.fractal_ssh.sftp().unlink(tarfile_path_remote)
|
1222
1219
|
logger.warning(f"Unlink {tarfile_path_remote=} - STOP")
|
1223
1220
|
|
1224
1221
|
# Create remote tarfile
|
@@ -1227,14 +1224,12 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1227
1224
|
"-m fractal_server.app.runner.compress_folder "
|
1228
1225
|
f"{(self.workflow_dir_remote / subfolder_name).as_posix()}"
|
1229
1226
|
)
|
1230
|
-
stdout =
|
1231
|
-
cmd=tar_command, connection=self.connection
|
1232
|
-
)
|
1227
|
+
stdout = self.fractal_ssh.run_command(cmd=tar_command)
|
1233
1228
|
print(stdout)
|
1234
1229
|
|
1235
1230
|
# Fetch tarfile
|
1236
1231
|
t_0_get = time.perf_counter()
|
1237
|
-
self.
|
1232
|
+
self.fractal_ssh.get(
|
1238
1233
|
remote=tarfile_path_remote,
|
1239
1234
|
local=tarfile_path_local,
|
1240
1235
|
)
|
@@ -1331,7 +1326,6 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1331
1326
|
"""
|
1332
1327
|
|
1333
1328
|
logger.debug("Executor shutdown: start")
|
1334
|
-
# self.connection.close()
|
1335
1329
|
|
1336
1330
|
# Handle all job futures
|
1337
1331
|
slurm_jobs_to_scancel = []
|
@@ -1354,9 +1348,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1354
1348
|
scancel_string = " ".join(slurm_jobs_to_scancel)
|
1355
1349
|
logger.warning(f"Now scancel-ing SLURM jobs {scancel_string}")
|
1356
1350
|
scancel_command = f"scancel {scancel_string}"
|
1357
|
-
|
1358
|
-
cmd=scancel_command, connection=self.connection
|
1359
|
-
)
|
1351
|
+
self.fractal_ssh.run_command(cmd=scancel_command)
|
1360
1352
|
logger.debug("Executor shutdown: end")
|
1361
1353
|
|
1362
1354
|
def __exit__(self, *args, **kwargs):
|
@@ -1381,10 +1373,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1381
1373
|
)
|
1382
1374
|
job_ids = ",".join([str(j) for j in job_ids])
|
1383
1375
|
squeue_command = squeue_command.replace("__JOBS__", job_ids)
|
1384
|
-
stdout =
|
1385
|
-
cmd=squeue_command,
|
1386
|
-
connection=self.connection,
|
1387
|
-
)
|
1376
|
+
stdout = self.fractal_ssh.run_command(cmd=squeue_command)
|
1388
1377
|
return stdout
|
1389
1378
|
|
1390
1379
|
def _jobs_finished(self, job_ids: list[str]) -> set[str]:
|
@@ -1458,13 +1447,13 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1458
1447
|
FIXME SSH: We could include further checks on version matches
|
1459
1448
|
"""
|
1460
1449
|
|
1461
|
-
check_connection(
|
1450
|
+
self.fractal_ssh.check_connection()
|
1462
1451
|
|
1463
1452
|
t_start_handshake = time.perf_counter()
|
1464
1453
|
|
1465
1454
|
logger.info("[FractalSlurmSSHExecutor.ssh_handshake] START")
|
1466
1455
|
cmd = f"{self.python_remote} -m fractal_server.app.runner.versions"
|
1467
|
-
stdout =
|
1456
|
+
stdout = self.fractal_ssh.run_command(cmd=cmd)
|
1468
1457
|
remote_versions = json.loads(stdout.strip("\n"))
|
1469
1458
|
|
1470
1459
|
# Check compatibility with local versions
|
@@ -2,18 +2,7 @@ from pathlib import Path
|
|
2
2
|
from typing import Optional
|
3
3
|
from typing import Union
|
4
4
|
|
5
|
-
from fractal_server.
|
6
|
-
|
7
|
-
|
8
|
-
def sanitize_component(value: str) -> str:
|
9
|
-
"""
|
10
|
-
Remove {" ", "/", "."} form a string, e.g. going from
|
11
|
-
'plate.zarr/B/03/0' to 'plate_zarr_B_03_0'.
|
12
|
-
|
13
|
-
Args:
|
14
|
-
value: Input strig
|
15
|
-
"""
|
16
|
-
return value.replace(" ", "_").replace("/", "_").replace(".", "_")
|
5
|
+
from fractal_server.string_tools import sanitize_string
|
17
6
|
|
18
7
|
|
19
8
|
def task_subfolder_name(order: Union[int, str], task_name: str) -> str:
|
@@ -24,7 +13,7 @@ def task_subfolder_name(order: Union[int, str], task_name: str) -> str:
|
|
24
13
|
order:
|
25
14
|
task_name:
|
26
15
|
"""
|
27
|
-
task_name_slug =
|
16
|
+
task_name_slug = sanitize_string(task_name)
|
28
17
|
return f"{order}_{task_name_slug}"
|
29
18
|
|
30
19
|
|
@@ -93,7 +82,7 @@ class TaskFiles:
|
|
93
82
|
self.component = component
|
94
83
|
|
95
84
|
if self.component is not None:
|
96
|
-
component_safe =
|
85
|
+
component_safe = sanitize_string(str(self.component))
|
97
86
|
component_safe = f"_par_{component_safe}"
|
98
87
|
else:
|
99
88
|
component_safe = ""
|
@@ -11,7 +11,6 @@ import traceback
|
|
11
11
|
from pathlib import Path
|
12
12
|
from typing import Optional
|
13
13
|
|
14
|
-
from fabric import Connection # FIXME SSH: try/except import
|
15
14
|
from sqlalchemy.orm import Session as DBSyncSession
|
16
15
|
from sqlalchemy.orm.attributes import flag_modified
|
17
16
|
|
@@ -19,6 +18,7 @@ from ....config import get_settings
|
|
19
18
|
from ....logger import get_logger
|
20
19
|
from ....logger import reset_logger_handlers
|
21
20
|
from ....logger import set_logger
|
21
|
+
from ....ssh._fabric import FractalSSH
|
22
22
|
from ....syringe import Inject
|
23
23
|
from ....utils import get_timestamp
|
24
24
|
from ...db import DB
|
@@ -79,7 +79,7 @@ async def submit_workflow(
|
|
79
79
|
worker_init: Optional[str] = None,
|
80
80
|
slurm_user: Optional[str] = None,
|
81
81
|
user_cache_dir: Optional[str] = None,
|
82
|
-
|
82
|
+
fractal_ssh: Optional[FractalSSH] = None,
|
83
83
|
) -> None:
|
84
84
|
"""
|
85
85
|
Prepares a workflow and applies it to a dataset
|
@@ -189,11 +189,8 @@ async def submit_workflow(
|
|
189
189
|
/ WORKFLOW_DIR_LOCAL.name
|
190
190
|
)
|
191
191
|
# FIXME SSH: move mkdir to executor, likely within handshake
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
_mkdir_over_ssh(
|
196
|
-
folder=str(WORKFLOW_DIR_REMOTE), connection=connection
|
192
|
+
fractal_ssh.mkdir(
|
193
|
+
folder=str(WORKFLOW_DIR_REMOTE),
|
197
194
|
)
|
198
195
|
logging.info(f"Created {str(WORKFLOW_DIR_REMOTE)} via SSH.")
|
199
196
|
else:
|
@@ -299,7 +296,7 @@ async def submit_workflow(
|
|
299
296
|
)
|
300
297
|
elif FRACTAL_RUNNER_BACKEND == "slurm_ssh":
|
301
298
|
process_workflow = slurm_ssh_process_workflow
|
302
|
-
backend_specific_kwargs = dict(
|
299
|
+
backend_specific_kwargs = dict(fractal_ssh=fractal_ssh)
|
303
300
|
else:
|
304
301
|
raise RuntimeError(
|
305
302
|
f"Invalid runner backend {FRACTAL_RUNNER_BACKEND=}"
|
@@ -21,8 +21,7 @@ from typing import Any
|
|
21
21
|
from typing import Optional
|
22
22
|
from typing import Union
|
23
23
|
|
24
|
-
from
|
25
|
-
|
24
|
+
from .....ssh._fabric import FractalSSH
|
26
25
|
from ....models.v2 import DatasetV2
|
27
26
|
from ....models.v2 import WorkflowV2
|
28
27
|
from ...async_wrap import async_wrap
|
@@ -41,7 +40,7 @@ def _process_workflow(
|
|
41
40
|
workflow_dir_remote: Path,
|
42
41
|
first_task_index: int,
|
43
42
|
last_task_index: int,
|
44
|
-
|
43
|
+
fractal_ssh: FractalSSH,
|
45
44
|
worker_init: Optional[Union[str, list[str]]] = None,
|
46
45
|
) -> dict[str, Any]:
|
47
46
|
"""
|
@@ -62,7 +61,7 @@ def _process_workflow(
|
|
62
61
|
worker_init = worker_init.split("\n")
|
63
62
|
|
64
63
|
with FractalSlurmSSHExecutor(
|
65
|
-
|
64
|
+
fractal_ssh=fractal_ssh,
|
66
65
|
workflow_dir_local=workflow_dir_local,
|
67
66
|
workflow_dir_remote=workflow_dir_remote,
|
68
67
|
common_script_lines=worker_init,
|
@@ -91,7 +90,7 @@ async def process_workflow(
|
|
91
90
|
last_task_index: Optional[int] = None,
|
92
91
|
logger_name: str,
|
93
92
|
# Not used
|
94
|
-
|
93
|
+
fractal_ssh: FractalSSH,
|
95
94
|
user_cache_dir: Optional[str] = None,
|
96
95
|
slurm_user: Optional[str] = None,
|
97
96
|
slurm_account: Optional[str] = None,
|
@@ -121,6 +120,6 @@ async def process_workflow(
|
|
121
120
|
first_task_index=first_task_index,
|
122
121
|
last_task_index=last_task_index,
|
123
122
|
worker_init=worker_init,
|
124
|
-
|
123
|
+
fractal_ssh=fractal_ssh,
|
125
124
|
)
|
126
125
|
return new_dataset_attributes
|
fractal_server/main.py
CHANGED
@@ -101,14 +101,16 @@ async def lifespan(app: FastAPI):
|
|
101
101
|
|
102
102
|
if settings.FRACTAL_RUNNER_BACKEND == "slurm_ssh":
|
103
103
|
from fractal_server.ssh._fabric import get_ssh_connection
|
104
|
+
from fractal_server.ssh._fabric import FractalSSH
|
104
105
|
|
105
|
-
|
106
|
+
connection = get_ssh_connection()
|
107
|
+
app.state.fractal_ssh = FractalSSH(connection=connection)
|
106
108
|
logger.info(
|
107
109
|
f"Created SSH connection "
|
108
|
-
f"({app.state.
|
110
|
+
f"({app.state.fractal_ssh.is_connected=})."
|
109
111
|
)
|
110
112
|
else:
|
111
|
-
app.state.
|
113
|
+
app.state.fractal_ssh = None
|
112
114
|
|
113
115
|
config_uvicorn_loggers()
|
114
116
|
logger.info("End application startup")
|
@@ -120,10 +122,10 @@ async def lifespan(app: FastAPI):
|
|
120
122
|
if settings.FRACTAL_RUNNER_BACKEND == "slurm_ssh":
|
121
123
|
logger.info(
|
122
124
|
f"Closing SSH connection "
|
123
|
-
f"(current: {app.state.
|
125
|
+
f"(current: {app.state.fractal_ssh.is_connected=})."
|
124
126
|
)
|
125
127
|
|
126
|
-
app.state.
|
128
|
+
app.state.fractal_ssh.close()
|
127
129
|
|
128
130
|
logger.info(
|
129
131
|
f"Current worker with pid {os.getpid()} is shutting down. "
|