fractal-server 2.3.0a3__py3-none-any.whl → 2.3.1a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/routes/api/v1/task_collection.py +2 -2
- fractal_server/app/routes/api/v2/__init__.py +8 -16
- fractal_server/app/routes/api/v2/submit.py +1 -1
- fractal_server/app/routes/api/v2/task_collection.py +72 -17
- fractal_server/app/routes/api/v2/task_collection_custom.py +26 -4
- fractal_server/app/runner/executors/slurm/ssh/executor.py +19 -30
- fractal_server/app/runner/task_files.py +3 -14
- fractal_server/app/runner/v2/__init__.py +5 -8
- fractal_server/app/runner/v2/_slurm_ssh/__init__.py +5 -6
- fractal_server/main.py +7 -5
- fractal_server/ssh/_fabric.py +295 -142
- fractal_server/string_tools.py +39 -0
- fractal_server/tasks/utils.py +0 -4
- fractal_server/tasks/v1/background_operations.py +2 -2
- fractal_server/tasks/v2/background_operations.py +2 -2
- fractal_server/tasks/v2/background_operations_ssh.py +23 -14
- {fractal_server-2.3.0a3.dist-info → fractal_server-2.3.1a0.dist-info}/METADATA +1 -1
- {fractal_server-2.3.0a3.dist-info → fractal_server-2.3.1a0.dist-info}/RECORD +22 -22
- fractal_server/app/routes/api/v2/task_collection_ssh.py +0 -125
- {fractal_server-2.3.0a3.dist-info → fractal_server-2.3.1a0.dist-info}/LICENSE +0 -0
- {fractal_server-2.3.0a3.dist-info → fractal_server-2.3.1a0.dist-info}/WHEEL +0 -0
- {fractal_server-2.3.0a3.dist-info → fractal_server-2.3.1a0.dist-info}/entry_points.txt +0 -0
fractal_server/ssh/_fabric.py
CHANGED
@@ -1,7 +1,16 @@
|
|
1
|
+
import logging
|
1
2
|
import time
|
3
|
+
from contextlib import contextmanager
|
4
|
+
from pathlib import Path
|
5
|
+
from threading import Lock
|
6
|
+
from typing import Any
|
7
|
+
from typing import Generator
|
8
|
+
from typing import Literal
|
2
9
|
from typing import Optional
|
3
10
|
|
11
|
+
import paramiko.sftp_client
|
4
12
|
from fabric import Connection
|
13
|
+
from fabric import Result
|
5
14
|
from invoke import UnexpectedExit
|
6
15
|
from paramiko.ssh_exception import NoValidConnectionsError
|
7
16
|
|
@@ -10,9 +19,293 @@ from ..logger import set_logger
|
|
10
19
|
from fractal_server.config import get_settings
|
11
20
|
from fractal_server.syringe import Inject
|
12
21
|
|
22
|
+
|
23
|
+
class FractalSSHTimeoutError(RuntimeError):
|
24
|
+
pass
|
25
|
+
|
26
|
+
|
13
27
|
logger = set_logger(__name__)
|
14
28
|
|
15
|
-
|
29
|
+
|
30
|
+
class FractalSSH(object):
|
31
|
+
|
32
|
+
"""
|
33
|
+
FIXME SSH: Fix docstring
|
34
|
+
|
35
|
+
Attributes:
|
36
|
+
_lock:
|
37
|
+
connection:
|
38
|
+
default_lock_timeout:
|
39
|
+
logger_name:
|
40
|
+
"""
|
41
|
+
|
42
|
+
_lock: Lock
|
43
|
+
_connection: Connection
|
44
|
+
default_lock_timeout: float
|
45
|
+
default_max_attempts: int
|
46
|
+
default_base_interval: float
|
47
|
+
logger_name: str
|
48
|
+
|
49
|
+
def __init__(
|
50
|
+
self,
|
51
|
+
connection: Connection,
|
52
|
+
default_timeout: float = 250,
|
53
|
+
default_max_attempts: int = 5,
|
54
|
+
default_base_interval: float = 3.0,
|
55
|
+
logger_name: str = __name__,
|
56
|
+
):
|
57
|
+
self._lock = Lock()
|
58
|
+
self._connection = connection
|
59
|
+
self.default_lock_timeout = default_timeout
|
60
|
+
self.default_base_interval = default_base_interval
|
61
|
+
self.default_max_attempts = default_max_attempts
|
62
|
+
self.logger_name = logger_name
|
63
|
+
set_logger(self.logger_name)
|
64
|
+
|
65
|
+
@contextmanager
|
66
|
+
def acquire_timeout(
|
67
|
+
self, timeout: float
|
68
|
+
) -> Generator[Literal[True], Any, None]:
|
69
|
+
self.logger.debug(f"Trying to acquire lock, with {timeout=}")
|
70
|
+
result = self._lock.acquire(timeout=timeout)
|
71
|
+
try:
|
72
|
+
if not result:
|
73
|
+
self.logger.error("Lock was *NOT* acquired.")
|
74
|
+
raise FractalSSHTimeoutError(
|
75
|
+
f"Failed to acquire lock within {timeout} seconds"
|
76
|
+
)
|
77
|
+
self.logger.debug("Lock was acquired.")
|
78
|
+
yield result
|
79
|
+
finally:
|
80
|
+
if result:
|
81
|
+
self._lock.release()
|
82
|
+
self.logger.debug("Lock was released")
|
83
|
+
|
84
|
+
@property
|
85
|
+
def is_connected(self) -> bool:
|
86
|
+
return self._connection.is_connected
|
87
|
+
|
88
|
+
@property
|
89
|
+
def logger(self) -> logging.Logger:
|
90
|
+
return get_logger(self.logger_name)
|
91
|
+
|
92
|
+
def put(
|
93
|
+
self, *args, lock_timeout: Optional[float] = None, **kwargs
|
94
|
+
) -> Result:
|
95
|
+
actual_lock_timeout = self.default_lock_timeout
|
96
|
+
if lock_timeout is not None:
|
97
|
+
actual_lock_timeout = lock_timeout
|
98
|
+
with self.acquire_timeout(timeout=actual_lock_timeout):
|
99
|
+
return self._connection.put(*args, **kwargs)
|
100
|
+
|
101
|
+
def get(
|
102
|
+
self, *args, lock_timeout: Optional[float] = None, **kwargs
|
103
|
+
) -> Result:
|
104
|
+
actual_lock_timeout = self.default_lock_timeout
|
105
|
+
if lock_timeout is not None:
|
106
|
+
actual_lock_timeout = lock_timeout
|
107
|
+
with self.acquire_timeout(timeout=actual_lock_timeout):
|
108
|
+
return self._connection.get(*args, **kwargs)
|
109
|
+
|
110
|
+
def run(
|
111
|
+
self, *args, lock_timeout: Optional[float] = None, **kwargs
|
112
|
+
) -> Any:
|
113
|
+
|
114
|
+
actual_lock_timeout = self.default_lock_timeout
|
115
|
+
if lock_timeout is not None:
|
116
|
+
actual_lock_timeout = lock_timeout
|
117
|
+
with self.acquire_timeout(timeout=actual_lock_timeout):
|
118
|
+
return self._connection.run(*args, **kwargs)
|
119
|
+
|
120
|
+
def sftp(self) -> paramiko.sftp_client.SFTPClient:
|
121
|
+
return self._connection.sftp()
|
122
|
+
|
123
|
+
def check_connection(self) -> None:
|
124
|
+
"""
|
125
|
+
Open the SSH connection and handle exceptions.
|
126
|
+
|
127
|
+
This function can be called from within other functions that use
|
128
|
+
`connection`, so that we can provide a meaningful error in case the
|
129
|
+
SSH connection cannot be opened.
|
130
|
+
"""
|
131
|
+
if not self._connection.is_connected:
|
132
|
+
try:
|
133
|
+
self._connection.open()
|
134
|
+
except Exception as e:
|
135
|
+
raise RuntimeError(
|
136
|
+
f"Cannot open SSH connection. Original error:\n{str(e)}"
|
137
|
+
)
|
138
|
+
|
139
|
+
def close(self) -> None:
|
140
|
+
return self._connection.close()
|
141
|
+
|
142
|
+
def run_command(
|
143
|
+
self,
|
144
|
+
*,
|
145
|
+
cmd: str,
|
146
|
+
max_attempts: Optional[int] = None,
|
147
|
+
base_interval: Optional[int] = None,
|
148
|
+
lock_timeout: Optional[int] = None,
|
149
|
+
) -> str:
|
150
|
+
"""
|
151
|
+
Run a command within an open SSH connection.
|
152
|
+
|
153
|
+
Args:
|
154
|
+
cmd: Command to be run
|
155
|
+
max_attempts:
|
156
|
+
base_interval:
|
157
|
+
lock_timeout:
|
158
|
+
|
159
|
+
Returns:
|
160
|
+
Standard output of the command, if successful.
|
161
|
+
"""
|
162
|
+
actual_max_attempts = self.default_max_attempts
|
163
|
+
if max_attempts is not None:
|
164
|
+
actual_max_attempts = max_attempts
|
165
|
+
|
166
|
+
actual_base_interval = self.default_base_interval
|
167
|
+
if base_interval is not None:
|
168
|
+
actual_base_interval = base_interval
|
169
|
+
|
170
|
+
actual_lock_timeout = self.default_lock_timeout
|
171
|
+
if lock_timeout is not None:
|
172
|
+
actual_lock_timeout = lock_timeout
|
173
|
+
|
174
|
+
t_0 = time.perf_counter()
|
175
|
+
ind_attempt = 0
|
176
|
+
while ind_attempt <= actual_max_attempts:
|
177
|
+
ind_attempt += 1
|
178
|
+
prefix = f"[attempt {ind_attempt}/{actual_max_attempts}]"
|
179
|
+
self.logger.info(f"{prefix} START running '{cmd}' over SSH.")
|
180
|
+
try:
|
181
|
+
# Case 1: Command runs successfully
|
182
|
+
res = self.run(
|
183
|
+
cmd, lock_timeout=actual_lock_timeout, hide=True
|
184
|
+
)
|
185
|
+
t_1 = time.perf_counter()
|
186
|
+
self.logger.info(
|
187
|
+
f"{prefix} END running '{cmd}' over SSH, "
|
188
|
+
f"elapsed {t_1-t_0:.3f}"
|
189
|
+
)
|
190
|
+
self.logger.debug(f"STDOUT: {res.stdout}")
|
191
|
+
self.logger.debug(f"STDERR: {res.stderr}")
|
192
|
+
return res.stdout
|
193
|
+
except NoValidConnectionsError as e:
|
194
|
+
# Case 2: Command fails with a connection error
|
195
|
+
self.logger.warning(
|
196
|
+
f"{prefix} Running command `{cmd}` over SSH failed.\n"
|
197
|
+
f"Original NoValidConnectionError:\n{str(e)}.\n"
|
198
|
+
f"{e.errors=}\n"
|
199
|
+
)
|
200
|
+
if ind_attempt < actual_max_attempts:
|
201
|
+
sleeptime = actual_base_interval**ind_attempt
|
202
|
+
self.logger.warning(
|
203
|
+
f"{prefix} Now sleep {sleeptime:.3f} "
|
204
|
+
"seconds and continue."
|
205
|
+
)
|
206
|
+
time.sleep(sleeptime)
|
207
|
+
else:
|
208
|
+
self.logger.error(f"{prefix} Reached last attempt")
|
209
|
+
break
|
210
|
+
except UnexpectedExit as e:
|
211
|
+
# Case 3: Command fails with an actual error
|
212
|
+
error_msg = (
|
213
|
+
f"{prefix} Running command `{cmd}` over SSH failed.\n"
|
214
|
+
f"Original error:\n{str(e)}."
|
215
|
+
)
|
216
|
+
self.logger.error(error_msg)
|
217
|
+
raise RuntimeError(error_msg)
|
218
|
+
except Exception as e:
|
219
|
+
self.logger.error(
|
220
|
+
f"Running command `{cmd}` over SSH failed.\n"
|
221
|
+
f"Original Error:\n{str(e)}."
|
222
|
+
)
|
223
|
+
raise e
|
224
|
+
|
225
|
+
raise RuntimeError(
|
226
|
+
f"Reached last attempt ({max_attempts=}) for running "
|
227
|
+
f"'{cmd}' over SSH"
|
228
|
+
)
|
229
|
+
|
230
|
+
def send_file(
|
231
|
+
self,
|
232
|
+
*,
|
233
|
+
local: str,
|
234
|
+
remote: str,
|
235
|
+
logger_name: Optional[str] = None,
|
236
|
+
lock_timeout: Optional[float] = None,
|
237
|
+
) -> None:
|
238
|
+
"""
|
239
|
+
Transfer a file via SSH
|
240
|
+
|
241
|
+
Args:
|
242
|
+
local: Local path to file
|
243
|
+
remote: Target path on remote host
|
244
|
+
fractal_ssh: FractalSSH connection object with custom lock
|
245
|
+
logger_name: Name of the logger
|
246
|
+
|
247
|
+
"""
|
248
|
+
try:
|
249
|
+
self.put(local=local, remote=remote, lock_timeout=lock_timeout)
|
250
|
+
except Exception as e:
|
251
|
+
logger = get_logger(logger_name=logger_name)
|
252
|
+
logger.error(
|
253
|
+
f"Transferring {local=} to {remote=} over SSH failed.\n"
|
254
|
+
f"Original Error:\n{str(e)}."
|
255
|
+
)
|
256
|
+
raise e
|
257
|
+
|
258
|
+
def mkdir(self, *, folder: str, parents: bool = True) -> None:
|
259
|
+
"""
|
260
|
+
Create a folder remotely via SSH.
|
261
|
+
|
262
|
+
Args:
|
263
|
+
folder:
|
264
|
+
fractal_ssh:
|
265
|
+
parents:
|
266
|
+
"""
|
267
|
+
# FIXME SSH: try using `mkdir` method of `paramiko.SFTPClient`
|
268
|
+
if parents:
|
269
|
+
cmd = f"mkdir -p {folder}"
|
270
|
+
else:
|
271
|
+
cmd = f"mkdir {folder}"
|
272
|
+
self.run_command(cmd=cmd)
|
273
|
+
|
274
|
+
def remove_folder(
|
275
|
+
self,
|
276
|
+
*,
|
277
|
+
folder: str,
|
278
|
+
safe_root: str,
|
279
|
+
) -> None:
|
280
|
+
"""
|
281
|
+
Removes a folder remotely via SSH.
|
282
|
+
|
283
|
+
This functions calls `rm -r`, after a few checks on `folder`.
|
284
|
+
|
285
|
+
Args:
|
286
|
+
folder: Absolute path to a folder that should be removed.
|
287
|
+
safe_root: If `folder` is not a subfolder of the absolute
|
288
|
+
`safe_root` path, raise an error.
|
289
|
+
fractal_ssh:
|
290
|
+
"""
|
291
|
+
invalid_characters = {" ", "\n", ";", "$", "`"}
|
292
|
+
|
293
|
+
if (
|
294
|
+
not isinstance(folder, str)
|
295
|
+
or not isinstance(safe_root, str)
|
296
|
+
or len(invalid_characters.intersection(folder)) > 0
|
297
|
+
or len(invalid_characters.intersection(safe_root)) > 0
|
298
|
+
or not Path(folder).is_absolute()
|
299
|
+
or not Path(safe_root).is_absolute()
|
300
|
+
or not Path(folder).resolve().is_relative_to(safe_root)
|
301
|
+
):
|
302
|
+
raise ValueError(
|
303
|
+
f"{folder=} argument is invalid or it is not "
|
304
|
+
f"relative to {safe_root=}."
|
305
|
+
)
|
306
|
+
else:
|
307
|
+
cmd = f"rm -r {folder}"
|
308
|
+
self.run_command(cmd=cmd)
|
16
309
|
|
17
310
|
|
18
311
|
def get_ssh_connection(
|
@@ -44,147 +337,7 @@ def get_ssh_connection(
|
|
44
337
|
connection = Connection(
|
45
338
|
host=host,
|
46
339
|
user=user,
|
340
|
+
forward_agent=False,
|
47
341
|
connect_kwargs={"key_filename": key_filename},
|
48
342
|
)
|
49
|
-
logger.debug(f"Now created {connection=}.")
|
50
343
|
return connection
|
51
|
-
|
52
|
-
|
53
|
-
def check_connection(connection: Connection) -> None:
|
54
|
-
"""
|
55
|
-
Open the SSH connection and handle exceptions.
|
56
|
-
|
57
|
-
This function can be called from within other functions that use
|
58
|
-
`connection`, so that we can provide a meaningful error in case the
|
59
|
-
SSH connection cannot be opened.
|
60
|
-
|
61
|
-
Args:
|
62
|
-
connection: Fabric connection object
|
63
|
-
"""
|
64
|
-
if not connection.is_connected:
|
65
|
-
try:
|
66
|
-
connection.open()
|
67
|
-
except Exception as e:
|
68
|
-
raise RuntimeError(
|
69
|
-
f"Cannot open SSH connection (original error: '{str(e)}')."
|
70
|
-
)
|
71
|
-
|
72
|
-
|
73
|
-
def run_command_over_ssh(
|
74
|
-
*,
|
75
|
-
cmd: str,
|
76
|
-
connection: Connection,
|
77
|
-
max_attempts: int = MAX_ATTEMPTS,
|
78
|
-
base_interval: float = 3.0,
|
79
|
-
) -> str:
|
80
|
-
"""
|
81
|
-
Run a command within an open SSH connection.
|
82
|
-
|
83
|
-
Args:
|
84
|
-
cmd: Command to be run
|
85
|
-
connection: Fabric connection object
|
86
|
-
|
87
|
-
Returns:
|
88
|
-
Standard output of the command, if successful.
|
89
|
-
"""
|
90
|
-
t_0 = time.perf_counter()
|
91
|
-
ind_attempt = 0
|
92
|
-
while ind_attempt <= max_attempts:
|
93
|
-
ind_attempt += 1
|
94
|
-
prefix = f"[attempt {ind_attempt}/{max_attempts}]"
|
95
|
-
logger.info(f"{prefix} START running '{cmd}' over SSH.")
|
96
|
-
try:
|
97
|
-
# Case 1: Command runs successfully
|
98
|
-
res = connection.run(cmd, hide=True)
|
99
|
-
t_1 = time.perf_counter()
|
100
|
-
logger.info(
|
101
|
-
f"{prefix} END running '{cmd}' over SSH, "
|
102
|
-
f"elapsed {t_1-t_0:.3f}"
|
103
|
-
)
|
104
|
-
logger.debug(f"STDOUT: {res.stdout}")
|
105
|
-
logger.debug(f"STDERR: {res.stderr}")
|
106
|
-
return res.stdout
|
107
|
-
except NoValidConnectionsError as e:
|
108
|
-
# Case 2: Command fails with a connection error
|
109
|
-
logger.warning(
|
110
|
-
f"{prefix} Running command `{cmd}` over SSH failed.\n"
|
111
|
-
f"Original NoValidConnectionError:\n{str(e)}.\n"
|
112
|
-
f"{e.errors=}\n"
|
113
|
-
)
|
114
|
-
if ind_attempt < max_attempts:
|
115
|
-
sleeptime = (
|
116
|
-
base_interval**ind_attempt
|
117
|
-
) # FIXME SSH: add jitter?
|
118
|
-
logger.warning(
|
119
|
-
f"{prefix} Now sleep {sleeptime:.3f} seconds and continue."
|
120
|
-
)
|
121
|
-
time.sleep(sleeptime)
|
122
|
-
continue
|
123
|
-
else:
|
124
|
-
logger.error(f"{prefix} Reached last attempt")
|
125
|
-
break
|
126
|
-
except UnexpectedExit as e:
|
127
|
-
# Case 3: Command fails with an actual error
|
128
|
-
error_msg = (
|
129
|
-
f"{prefix} Running command `{cmd}` over SSH failed.\n"
|
130
|
-
f"Original error:\n{str(e)}."
|
131
|
-
)
|
132
|
-
logger.error(error_msg)
|
133
|
-
raise ValueError(error_msg)
|
134
|
-
except Exception as e:
|
135
|
-
logger.error(
|
136
|
-
f"Running command `{cmd}` over SSH failed.\n"
|
137
|
-
f"Original Error:\n{str(e)}."
|
138
|
-
)
|
139
|
-
raise e
|
140
|
-
|
141
|
-
raise ValueError(
|
142
|
-
f"Reached last attempt ({max_attempts=}) for running '{cmd}' over SSH"
|
143
|
-
)
|
144
|
-
|
145
|
-
|
146
|
-
def put_over_ssh(
|
147
|
-
*,
|
148
|
-
local: str,
|
149
|
-
remote: str,
|
150
|
-
connection: Connection,
|
151
|
-
logger_name: Optional[str] = None,
|
152
|
-
) -> None:
|
153
|
-
"""
|
154
|
-
Transfer a file via SSH
|
155
|
-
|
156
|
-
Args:
|
157
|
-
local: Local path to file
|
158
|
-
remote: Target path on remote host
|
159
|
-
connection: Fabric connection object
|
160
|
-
logger_name: Name of the logger
|
161
|
-
|
162
|
-
"""
|
163
|
-
try:
|
164
|
-
connection.put(local=local, remote=remote)
|
165
|
-
except Exception as e:
|
166
|
-
logger = get_logger(logger_name=logger_name)
|
167
|
-
logger.error(
|
168
|
-
f"Transferring {local=} to {remote=} over SSH failed.\n"
|
169
|
-
f"Original Error:\n{str(e)}."
|
170
|
-
)
|
171
|
-
raise e
|
172
|
-
|
173
|
-
|
174
|
-
def _mkdir_over_ssh(
|
175
|
-
*, folder: str, connection: Connection, parents: bool = True
|
176
|
-
) -> None:
|
177
|
-
"""
|
178
|
-
Create a folder remotely via SSH.
|
179
|
-
|
180
|
-
Args:
|
181
|
-
folder:
|
182
|
-
connection:
|
183
|
-
parents:
|
184
|
-
"""
|
185
|
-
# FIXME SSH: try using `mkdir` method of `paramiko.SFTPClient`
|
186
|
-
if parents:
|
187
|
-
cmd = f"mkdir -p {folder}"
|
188
|
-
else:
|
189
|
-
cmd = f"mkdir {folder}"
|
190
|
-
run_command_over_ssh(cmd=cmd, connection=connection)
|
@@ -0,0 +1,39 @@
|
|
1
|
+
import string
|
2
|
+
|
3
|
+
__SPECIAL_CHARACTERS__ = f"{string.punctuation}{string.whitespace}"
|
4
|
+
|
5
|
+
|
6
|
+
def sanitize_string(value: str) -> str:
|
7
|
+
"""
|
8
|
+
Make string safe to be used in file/folder names and subprocess commands.
|
9
|
+
|
10
|
+
Make the string lower-case, and replace any special character with an
|
11
|
+
underscore, where special characters are:
|
12
|
+
```python repl
|
13
|
+
>>> string.punctuation
|
14
|
+
'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
|
15
|
+
>>> string.whitespace
|
16
|
+
' \t\n\r\x0b\x0c'
|
17
|
+
```
|
18
|
+
|
19
|
+
Args:
|
20
|
+
value: Input string
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
Sanitized value
|
24
|
+
"""
|
25
|
+
new_value = value.lower()
|
26
|
+
for character in __SPECIAL_CHARACTERS__:
|
27
|
+
new_value = new_value.replace(character, "_")
|
28
|
+
return new_value
|
29
|
+
|
30
|
+
|
31
|
+
def slugify_task_name_for_source(task_name: str) -> str:
|
32
|
+
"""
|
33
|
+
NOTE: this function is used upon creation of tasks' sources, therefore
|
34
|
+
for the moment we cannot replace it with its more comprehensive version
|
35
|
+
from `fractal_server.string_tools.sanitize_string`, nor we can remove it.
|
36
|
+
As 2.3.1, we are renaming it to `slugify_task_name_for_source`, to make
|
37
|
+
it clear that it should not be used for other purposes.
|
38
|
+
"""
|
39
|
+
return task_name.replace(" ", "_").lower()
|
fractal_server/tasks/utils.py
CHANGED
@@ -9,10 +9,6 @@ COLLECTION_LOG_FILENAME = "collection.log"
|
|
9
9
|
COLLECTION_FREEZE_FILENAME = "collection_freeze.txt"
|
10
10
|
|
11
11
|
|
12
|
-
def slugify_task_name(task_name: str) -> str:
|
13
|
-
return task_name.replace(" ", "_").lower()
|
14
|
-
|
15
|
-
|
16
12
|
def get_absolute_venv_path(venv_path: Path) -> Path:
|
17
13
|
"""
|
18
14
|
If a path is not absolute, make it a relative path of FRACTAL_TASKS_DIR.
|
@@ -6,11 +6,11 @@ import json
|
|
6
6
|
from pathlib import Path
|
7
7
|
from shutil import rmtree as shell_rmtree
|
8
8
|
|
9
|
+
from ...string_tools import slugify_task_name_for_source
|
9
10
|
from ..utils import _normalize_package_name
|
10
11
|
from ..utils import get_collection_log
|
11
12
|
from ..utils import get_collection_path
|
12
13
|
from ..utils import get_log_path
|
13
|
-
from ..utils import slugify_task_name
|
14
14
|
from ._TaskCollectPip import _TaskCollectPip
|
15
15
|
from .utils import _init_venv_v1
|
16
16
|
from fractal_server.app.db import DBSyncSession
|
@@ -215,7 +215,7 @@ async def create_package_environment_pip(
|
|
215
215
|
# Fill in attributes for TaskCreate
|
216
216
|
task_executable = package_root / t.executable
|
217
217
|
cmd = f"{python_bin.as_posix()} {task_executable.as_posix()}"
|
218
|
-
task_name_slug =
|
218
|
+
task_name_slug = slugify_task_name_for_source(t.name)
|
219
219
|
task_source = f"{task_pkg.package_source}:{task_name_slug}"
|
220
220
|
if not task_executable.exists():
|
221
221
|
raise FileNotFoundError(
|
@@ -10,12 +10,12 @@ from typing import Optional
|
|
10
10
|
from sqlalchemy.orm import Session as DBSyncSession
|
11
11
|
from sqlalchemy.orm.attributes import flag_modified
|
12
12
|
|
13
|
+
from ...string_tools import slugify_task_name_for_source
|
13
14
|
from ..utils import get_absolute_venv_path
|
14
15
|
from ..utils import get_collection_freeze
|
15
16
|
from ..utils import get_collection_log
|
16
17
|
from ..utils import get_collection_path
|
17
18
|
from ..utils import get_log_path
|
18
|
-
from ..utils import slugify_task_name
|
19
19
|
from ._TaskCollectPip import _TaskCollectPip
|
20
20
|
from fractal_server.app.db import get_sync_db
|
21
21
|
from fractal_server.app.models.v2 import CollectionStateV2
|
@@ -177,7 +177,7 @@ def _prepare_tasks_metadata(
|
|
177
177
|
task_attributes = {}
|
178
178
|
if package_version is not None:
|
179
179
|
task_attributes["version"] = package_version
|
180
|
-
task_name_slug =
|
180
|
+
task_name_slug = slugify_task_name_for_source(_task.name)
|
181
181
|
task_attributes["source"] = f"{package_source}:{task_name_slug}"
|
182
182
|
if package_manifest.has_args_schemas:
|
183
183
|
task_attributes[
|
@@ -3,7 +3,6 @@ import os
|
|
3
3
|
from pathlib import Path
|
4
4
|
from tempfile import TemporaryDirectory
|
5
5
|
|
6
|
-
from fabric import Connection
|
7
6
|
from sqlalchemy.orm.attributes import flag_modified
|
8
7
|
|
9
8
|
from ...app.models.v2 import CollectionStateV2
|
@@ -18,9 +17,7 @@ from fractal_server.app.schemas.v2.manifest import ManifestV2
|
|
18
17
|
from fractal_server.config import get_settings
|
19
18
|
from fractal_server.logger import get_logger
|
20
19
|
from fractal_server.logger import set_logger
|
21
|
-
from fractal_server.ssh._fabric import
|
22
|
-
from fractal_server.ssh._fabric import put_over_ssh
|
23
|
-
from fractal_server.ssh._fabric import run_command_over_ssh
|
20
|
+
from fractal_server.ssh._fabric import FractalSSH
|
24
21
|
from fractal_server.syringe import Inject
|
25
22
|
from fractal_server.tasks.v2.utils import get_python_interpreter_v2
|
26
23
|
|
@@ -59,7 +56,7 @@ def _customize_and_run_template(
|
|
59
56
|
replacements: list[tuple[str, str]],
|
60
57
|
tmpdir: str,
|
61
58
|
logger_name: str,
|
62
|
-
|
59
|
+
fractal_ssh: FractalSSH,
|
63
60
|
) -> str:
|
64
61
|
"""
|
65
62
|
Customize one of the template bash scripts, transfer it to the remote host
|
@@ -71,7 +68,7 @@ def _customize_and_run_template(
|
|
71
68
|
replacements:
|
72
69
|
tmpdir:
|
73
70
|
logger_name:
|
74
|
-
|
71
|
+
fractal_ssh:
|
75
72
|
"""
|
76
73
|
logger = get_logger(logger_name)
|
77
74
|
logger.debug(f"_customize_and_run_template {script_filename} - START")
|
@@ -95,17 +92,15 @@ def _customize_and_run_template(
|
|
95
92
|
f"script_{abs(hash(tmpdir))}{script_filename}",
|
96
93
|
)
|
97
94
|
logger.debug(f"Now transfer {script_path_local=} over SSH.")
|
98
|
-
|
95
|
+
fractal_ssh.send_file(
|
99
96
|
local=script_path_local,
|
100
97
|
remote=script_path_remote,
|
101
|
-
connection=connection,
|
102
|
-
logger_name=logger_name,
|
103
98
|
)
|
104
99
|
|
105
100
|
# Execute script remotely
|
106
101
|
cmd = f"bash {script_path_remote}"
|
107
102
|
logger.debug(f"Now run '{cmd}' over SSH.")
|
108
|
-
stdout =
|
103
|
+
stdout = fractal_ssh.run_command(cmd=cmd)
|
109
104
|
logger.debug(f"Standard output of '{cmd}':\n{stdout}")
|
110
105
|
|
111
106
|
logger.debug(f"_customize_and_run_template {script_filename} - END")
|
@@ -115,7 +110,7 @@ def _customize_and_run_template(
|
|
115
110
|
def background_collect_pip_ssh(
|
116
111
|
state_id: int,
|
117
112
|
task_pkg: _TaskCollectPip,
|
118
|
-
|
113
|
+
fractal_ssh: FractalSSH,
|
119
114
|
) -> None:
|
120
115
|
"""
|
121
116
|
Collect a task package over SSH
|
@@ -127,6 +122,7 @@ def background_collect_pip_ssh(
|
|
127
122
|
starlette/fastapi handling of background tasks (see
|
128
123
|
https://github.com/encode/starlette/blob/master/starlette/background.py).
|
129
124
|
"""
|
125
|
+
|
130
126
|
# Work within a temporary folder, where also logs will be placed
|
131
127
|
with TemporaryDirectory() as tmpdir:
|
132
128
|
LOGGER_NAME = "task_collection_ssh"
|
@@ -185,10 +181,10 @@ def background_collect_pip_ssh(
|
|
185
181
|
replacements=replacements,
|
186
182
|
tmpdir=tmpdir,
|
187
183
|
logger_name=LOGGER_NAME,
|
188
|
-
|
184
|
+
fractal_ssh=fractal_ssh,
|
189
185
|
)
|
190
186
|
|
191
|
-
check_connection(
|
187
|
+
fractal_ssh.check_connection()
|
192
188
|
|
193
189
|
logger.debug("installing - START")
|
194
190
|
_set_collection_state_data_status(
|
@@ -269,7 +265,7 @@ def background_collect_pip_ssh(
|
|
269
265
|
).as_posix()
|
270
266
|
|
271
267
|
# Read and validate remote manifest file
|
272
|
-
with
|
268
|
+
with fractal_ssh.sftp().open(manifest_path_remote, "r") as f:
|
273
269
|
manifest = json.load(f)
|
274
270
|
logger.info(f"collecting - loaded {manifest_path_remote=}")
|
275
271
|
ManifestV2(**manifest)
|
@@ -308,6 +304,7 @@ def background_collect_pip_ssh(
|
|
308
304
|
logger.debug("END")
|
309
305
|
|
310
306
|
except Exception as e:
|
307
|
+
# Delete corrupted package dir
|
311
308
|
_handle_failure(
|
312
309
|
state_id=state_id,
|
313
310
|
log_file_path=log_file_path,
|
@@ -315,4 +312,16 @@ def background_collect_pip_ssh(
|
|
315
312
|
exception=e,
|
316
313
|
db=db,
|
317
314
|
)
|
315
|
+
try:
|
316
|
+
logger.info(f"Now delete remote folder {package_env_dir}")
|
317
|
+
fractal_ssh.remove_folder(
|
318
|
+
folder=package_env_dir,
|
319
|
+
safe_root=settings.FRACTAL_SLURM_SSH_WORKING_BASE_DIR,
|
320
|
+
)
|
321
|
+
logger.info(f"Deleted remoted folder {package_env_dir}")
|
322
|
+
except Exception as e:
|
323
|
+
logger.error(
|
324
|
+
f"Deleting remote folder failed.\n"
|
325
|
+
f"Original error:\n{str(e)}"
|
326
|
+
)
|
318
327
|
return
|