fractal-server 2.7.0a9__py3-none-any.whl → 2.7.0a11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/routes/api/v2/workflow_import.py +4 -1
- fractal_server/app/runner/executors/slurm/ssh/executor.py +82 -63
- fractal_server/ssh/_fabric.py +186 -73
- fractal_server/tasks/v2/background_operations_ssh.py +15 -9
- {fractal_server-2.7.0a9.dist-info → fractal_server-2.7.0a11.dist-info}/METADATA +1 -1
- {fractal_server-2.7.0a9.dist-info → fractal_server-2.7.0a11.dist-info}/RECORD +10 -10
- {fractal_server-2.7.0a9.dist-info → fractal_server-2.7.0a11.dist-info}/LICENSE +0 -0
- {fractal_server-2.7.0a9.dist-info → fractal_server-2.7.0a11.dist-info}/WHEEL +0 -0
- {fractal_server-2.7.0a9.dist-info → fractal_server-2.7.0a11.dist-info}/entry_points.txt +0 -0
fractal_server/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__VERSION__ = "2.7.
|
1
|
+
__VERSION__ = "2.7.0a11"
|
@@ -241,7 +241,10 @@ async def _get_task_by_taskimport(
|
|
241
241
|
"Found many task groups, after filtering by version."
|
242
242
|
)
|
243
243
|
final_task_group = await _disambiguate_task_groups(
|
244
|
-
matching_task_groups,
|
244
|
+
matching_task_groups=matching_task_groups,
|
245
|
+
user_id=user_id,
|
246
|
+
db=db,
|
247
|
+
default_group_id=default_group_id,
|
245
248
|
)
|
246
249
|
if final_task_group is None:
|
247
250
|
logger.info(
|
@@ -861,7 +861,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
861
861
|
|
862
862
|
# Transfer archive
|
863
863
|
t_0_put = time.perf_counter()
|
864
|
-
self.fractal_ssh.
|
864
|
+
self.fractal_ssh.send_file(
|
865
865
|
local=tarfile_path_local,
|
866
866
|
remote=tarfile_path_remote,
|
867
867
|
)
|
@@ -1055,55 +1055,59 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1055
1055
|
Arguments:
|
1056
1056
|
jobid: ID of the SLURM job
|
1057
1057
|
"""
|
1058
|
-
|
1059
|
-
# Loop over all job_ids, and fetch future and job objects
|
1060
|
-
futures: list[Future] = []
|
1061
|
-
jobs: list[SlurmJob] = []
|
1062
|
-
with self.jobs_lock:
|
1063
|
-
for job_id in job_ids:
|
1064
|
-
future, job = self.jobs.pop(job_id)
|
1065
|
-
futures.append(future)
|
1066
|
-
jobs.append(job)
|
1067
|
-
if not self.jobs:
|
1068
|
-
self.jobs_empty_cond.notify_all()
|
1069
|
-
|
1070
|
-
# Fetch subfolder from remote host
|
1058
|
+
# Handle all uncaught exceptions in this broad try/except block
|
1071
1059
|
try:
|
1072
|
-
self._get_subfolder_sftp(jobs=jobs)
|
1073
|
-
except NoValidConnectionsError as e:
|
1074
|
-
logger.error("NoValidConnectionError")
|
1075
|
-
logger.error(f"{str(e)=}")
|
1076
|
-
logger.error(f"{e.errors=}")
|
1077
|
-
for err in e.errors:
|
1078
|
-
logger.error(f"{str(err)}")
|
1079
|
-
|
1080
|
-
raise e
|
1081
|
-
|
1082
|
-
# First round of checking whether all output files exist
|
1083
|
-
missing_out_paths = []
|
1084
|
-
for job in jobs:
|
1085
|
-
for ind_out_path, out_path in enumerate(
|
1086
|
-
job.output_pickle_files_local
|
1087
|
-
):
|
1088
|
-
if not out_path.exists():
|
1089
|
-
missing_out_paths.append(out_path)
|
1090
|
-
num_missing = len(missing_out_paths)
|
1091
|
-
if num_missing > 0:
|
1092
|
-
# Output pickle files may be missing e.g. because of some slow
|
1093
|
-
# filesystem operation; wait some time before re-trying
|
1094
|
-
settings = Inject(get_settings)
|
1095
|
-
sleep_time = settings.FRACTAL_SLURM_ERROR_HANDLING_INTERVAL
|
1096
1060
|
logger.info(
|
1097
|
-
f"
|
1098
|
-
f"sleep {sleep_time} seconds."
|
1061
|
+
f"[FractalSlurmSSHExecutor._completion] START, for {job_ids=}."
|
1099
1062
|
)
|
1100
|
-
for missing_file in missing_out_paths:
|
1101
|
-
logger.debug(f"Missing output pickle file: {missing_file}")
|
1102
|
-
time.sleep(sleep_time)
|
1103
1063
|
|
1104
|
-
|
1105
|
-
|
1064
|
+
# Loop over all job_ids, and fetch future and job objects
|
1065
|
+
futures: list[Future] = []
|
1066
|
+
jobs: list[SlurmJob] = []
|
1067
|
+
with self.jobs_lock:
|
1068
|
+
for job_id in job_ids:
|
1069
|
+
future, job = self.jobs.pop(job_id)
|
1070
|
+
futures.append(future)
|
1071
|
+
jobs.append(job)
|
1072
|
+
if not self.jobs:
|
1073
|
+
self.jobs_empty_cond.notify_all()
|
1074
|
+
|
1075
|
+
# Fetch subfolder from remote host
|
1106
1076
|
try:
|
1077
|
+
self._get_subfolder_sftp(jobs=jobs)
|
1078
|
+
except NoValidConnectionsError as e:
|
1079
|
+
logger.error("NoValidConnectionError")
|
1080
|
+
logger.error(f"{str(e)=}")
|
1081
|
+
logger.error(f"{e.errors=}")
|
1082
|
+
for err in e.errors:
|
1083
|
+
logger.error(f"{str(err)}")
|
1084
|
+
|
1085
|
+
raise e
|
1086
|
+
|
1087
|
+
# First round of checking whether all output files exist
|
1088
|
+
missing_out_paths = []
|
1089
|
+
for job in jobs:
|
1090
|
+
for ind_out_path, out_path in enumerate(
|
1091
|
+
job.output_pickle_files_local
|
1092
|
+
):
|
1093
|
+
if not out_path.exists():
|
1094
|
+
missing_out_paths.append(out_path)
|
1095
|
+
num_missing = len(missing_out_paths)
|
1096
|
+
if num_missing > 0:
|
1097
|
+
# Output pickle files may be missing e.g. because of some slow
|
1098
|
+
# filesystem operation; wait some time before re-trying
|
1099
|
+
settings = Inject(get_settings)
|
1100
|
+
sleep_time = settings.FRACTAL_SLURM_ERROR_HANDLING_INTERVAL
|
1101
|
+
logger.info(
|
1102
|
+
f"{num_missing} output pickle files are missing; "
|
1103
|
+
f"sleep {sleep_time} seconds."
|
1104
|
+
)
|
1105
|
+
for missing_file in missing_out_paths:
|
1106
|
+
logger.debug(f"Missing output pickle file: {missing_file}")
|
1107
|
+
time.sleep(sleep_time)
|
1108
|
+
|
1109
|
+
# Handle all jobs
|
1110
|
+
for ind_job, job_id in enumerate(job_ids):
|
1107
1111
|
# Retrieve job and future objects
|
1108
1112
|
job = jobs[ind_job]
|
1109
1113
|
future = futures[ind_job]
|
@@ -1128,6 +1132,11 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1128
1132
|
remaining_futures=remaining_futures,
|
1129
1133
|
remaining_job_ids=remaining_job_ids,
|
1130
1134
|
)
|
1135
|
+
logger.info(
|
1136
|
+
"[FractalSlurmSSHExecutor._completion] END, "
|
1137
|
+
f"for {job_ids=}, with JobExecutionError due "
|
1138
|
+
f"to missing {out_path.as_posix()}."
|
1139
|
+
)
|
1131
1140
|
return
|
1132
1141
|
except InvalidStateError:
|
1133
1142
|
logger.warning(
|
@@ -1141,6 +1150,12 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1141
1150
|
remaining_futures=remaining_futures,
|
1142
1151
|
remaining_job_ids=remaining_job_ids,
|
1143
1152
|
)
|
1153
|
+
logger.info(
|
1154
|
+
"[FractalSlurmSSHExecutor._completion] END, "
|
1155
|
+
f"for {job_ids=}, with JobExecutionError/"
|
1156
|
+
"InvalidStateError due to "
|
1157
|
+
f"missing {out_path.as_posix()}."
|
1158
|
+
)
|
1144
1159
|
return
|
1145
1160
|
|
1146
1161
|
# Read the task output
|
@@ -1217,16 +1232,22 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1217
1232
|
else:
|
1218
1233
|
future.set_result(outputs)
|
1219
1234
|
|
1220
|
-
|
1235
|
+
except Exception as e:
|
1236
|
+
logger.warning(
|
1237
|
+
"[FractalSlurmSSHExecutor._completion] "
|
1238
|
+
f"An exception took place: {str(e)}."
|
1239
|
+
)
|
1240
|
+
for future in futures:
|
1221
1241
|
try:
|
1242
|
+
logger.info(f"Set exception for {future=}")
|
1222
1243
|
future.set_exception(e)
|
1223
|
-
return
|
1224
1244
|
except InvalidStateError:
|
1225
|
-
logger.
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1229
|
-
|
1245
|
+
logger.info(f"Future {future} was already cancelled.")
|
1246
|
+
logger.info(
|
1247
|
+
f"[FractalSlurmSSHExecutor._completion] END, for {job_ids=}, "
|
1248
|
+
"from within exception handling."
|
1249
|
+
)
|
1250
|
+
return
|
1230
1251
|
|
1231
1252
|
def _get_subfolder_sftp(self, jobs: list[SlurmJob]) -> None:
|
1232
1253
|
"""
|
@@ -1255,16 +1276,9 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1255
1276
|
self.workflow_dir_remote / f"{subfolder_name}.tar.gz"
|
1256
1277
|
).as_posix()
|
1257
1278
|
|
1258
|
-
# Remove
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
# Remove remote tarfile - FIXME SSH: is this needed?
|
1263
|
-
# rm_command = f"rm {tarfile_path_remote}"
|
1264
|
-
# _run_command_over_ssh(cmd=rm_command, fractal_ssh=self.fractal_ssh)
|
1265
|
-
logger.warning(f"Unlink {tarfile_path_remote=} - START")
|
1266
|
-
self.fractal_ssh.sftp().unlink(tarfile_path_remote)
|
1267
|
-
logger.warning(f"Unlink {tarfile_path_remote=} - STOP")
|
1279
|
+
# Remove remote tarfile
|
1280
|
+
rm_command = f"rm {tarfile_path_remote}"
|
1281
|
+
self.fractal_ssh.run_command(cmd=rm_command)
|
1268
1282
|
|
1269
1283
|
# Create remote tarfile
|
1270
1284
|
tar_command = (
|
@@ -1278,7 +1292,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1278
1292
|
|
1279
1293
|
# Fetch tarfile
|
1280
1294
|
t_0_get = time.perf_counter()
|
1281
|
-
self.fractal_ssh.
|
1295
|
+
self.fractal_ssh.fetch_file(
|
1282
1296
|
remote=tarfile_path_remote,
|
1283
1297
|
local=tarfile_path_local,
|
1284
1298
|
)
|
@@ -1291,6 +1305,11 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1291
1305
|
# Extract tarfile locally
|
1292
1306
|
extract_archive(Path(tarfile_path_local))
|
1293
1307
|
|
1308
|
+
# Remove local tarfile
|
1309
|
+
if Path(tarfile_path_local).exists():
|
1310
|
+
logger.warning(f"Remove existing file {tarfile_path_local}.")
|
1311
|
+
Path(tarfile_path_local).unlink()
|
1312
|
+
|
1294
1313
|
t_1 = time.perf_counter()
|
1295
1314
|
logger.info("[_get_subfolder_sftp] End - " f"elapsed: {t_1-t_0:.3f} s")
|
1296
1315
|
|
fractal_server/ssh/_fabric.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import json
|
1
2
|
import logging
|
2
3
|
import time
|
3
4
|
from contextlib import contextmanager
|
@@ -23,21 +24,58 @@ class FractalSSHTimeoutError(RuntimeError):
|
|
23
24
|
pass
|
24
25
|
|
25
26
|
|
26
|
-
|
27
|
-
pass
|
27
|
+
logger = set_logger(__name__)
|
28
28
|
|
29
29
|
|
30
|
-
|
30
|
+
@contextmanager
|
31
|
+
def _acquire_lock_with_timeout(
|
32
|
+
lock: Lock,
|
33
|
+
label: str,
|
34
|
+
timeout: float,
|
35
|
+
logger_name: str = __name__,
|
36
|
+
) -> Generator[Literal[True], Any, None]:
|
37
|
+
"""
|
38
|
+
Given a `threading.Lock` object, try to acquire it within a given timeout.
|
39
|
+
|
40
|
+
Arguments:
|
41
|
+
lock:
|
42
|
+
label:
|
43
|
+
timeout:
|
44
|
+
logger_name:
|
45
|
+
"""
|
46
|
+
logger = get_logger(logger_name)
|
47
|
+
logger.info(f"Trying to acquire lock for '{label}', with {timeout=}")
|
48
|
+
result = lock.acquire(timeout=timeout)
|
49
|
+
try:
|
50
|
+
if not result:
|
51
|
+
logger.error(f"Lock for '{label}' was *not* acquired.")
|
52
|
+
raise FractalSSHTimeoutError(
|
53
|
+
f"Failed to acquire lock for '{label}' within "
|
54
|
+
f"{timeout} seconds"
|
55
|
+
)
|
56
|
+
logger.info(f"Lock for '{label}' was acquired.")
|
57
|
+
yield result
|
58
|
+
finally:
|
59
|
+
if result:
|
60
|
+
lock.release()
|
61
|
+
logger.info(f"Lock for '{label}' was released.")
|
31
62
|
|
32
63
|
|
33
64
|
class FractalSSH(object):
|
34
65
|
"""
|
35
|
-
|
66
|
+
Wrapper of `fabric.Connection` object, enriched with locks.
|
67
|
+
|
68
|
+
Note: methods marked as `_unsafe` should not be used directly,
|
69
|
+
since they do not enforce locking.
|
36
70
|
|
37
71
|
Attributes:
|
38
72
|
_lock:
|
39
|
-
|
73
|
+
_connection:
|
40
74
|
default_lock_timeout:
|
75
|
+
default_max_attempts:
|
76
|
+
default_base_interval:
|
77
|
+
sftp_get_prefetch:
|
78
|
+
sftp_get_max_requests:
|
41
79
|
logger_name:
|
42
80
|
"""
|
43
81
|
|
@@ -46,6 +84,8 @@ class FractalSSH(object):
|
|
46
84
|
default_lock_timeout: float
|
47
85
|
default_max_attempts: int
|
48
86
|
default_base_interval: float
|
87
|
+
sftp_get_prefetch: bool
|
88
|
+
sftp_get_max_requests: int
|
49
89
|
logger_name: str
|
50
90
|
|
51
91
|
def __init__(
|
@@ -54,6 +94,8 @@ class FractalSSH(object):
|
|
54
94
|
default_timeout: float = 250,
|
55
95
|
default_max_attempts: int = 5,
|
56
96
|
default_base_interval: float = 3.0,
|
97
|
+
sftp_get_prefetch: bool = False,
|
98
|
+
sftp_get_max_requests: int = 64,
|
57
99
|
logger_name: str = __name__,
|
58
100
|
):
|
59
101
|
self._lock = Lock()
|
@@ -61,28 +103,11 @@ class FractalSSH(object):
|
|
61
103
|
self.default_lock_timeout = default_timeout
|
62
104
|
self.default_base_interval = default_base_interval
|
63
105
|
self.default_max_attempts = default_max_attempts
|
106
|
+
self.sftp_get_prefetch = sftp_get_prefetch
|
107
|
+
self.sftp_get_max_requests = sftp_get_max_requests
|
64
108
|
self.logger_name = logger_name
|
65
109
|
set_logger(self.logger_name)
|
66
110
|
|
67
|
-
@contextmanager
|
68
|
-
def acquire_timeout(
|
69
|
-
self, timeout: float
|
70
|
-
) -> Generator[Literal[True], Any, None]:
|
71
|
-
self.logger.debug(f"Trying to acquire lock, with {timeout=}")
|
72
|
-
result = self._lock.acquire(timeout=timeout)
|
73
|
-
try:
|
74
|
-
if not result:
|
75
|
-
self.logger.error("Lock was *NOT* acquired.")
|
76
|
-
raise FractalSSHTimeoutError(
|
77
|
-
f"Failed to acquire lock within {timeout} seconds"
|
78
|
-
)
|
79
|
-
self.logger.debug("Lock was acquired.")
|
80
|
-
yield result
|
81
|
-
finally:
|
82
|
-
if result:
|
83
|
-
self._lock.release()
|
84
|
-
self.logger.debug("Lock was released")
|
85
|
-
|
86
111
|
@property
|
87
112
|
def is_connected(self) -> bool:
|
88
113
|
return self._connection.is_connected
|
@@ -91,36 +116,82 @@ class FractalSSH(object):
|
|
91
116
|
def logger(self) -> logging.Logger:
|
92
117
|
return get_logger(self.logger_name)
|
93
118
|
|
94
|
-
def
|
95
|
-
self,
|
119
|
+
def _put(
|
120
|
+
self,
|
121
|
+
*,
|
122
|
+
local: str,
|
123
|
+
remote: str,
|
124
|
+
label: str,
|
125
|
+
lock_timeout: Optional[float] = None,
|
96
126
|
) -> Result:
|
127
|
+
"""
|
128
|
+
Transfer a local file to a remote path, via SFTP.
|
129
|
+
"""
|
97
130
|
actual_lock_timeout = self.default_lock_timeout
|
98
131
|
if lock_timeout is not None:
|
99
132
|
actual_lock_timeout = lock_timeout
|
100
|
-
with
|
101
|
-
|
133
|
+
with _acquire_lock_with_timeout(
|
134
|
+
lock=self._lock,
|
135
|
+
label=label,
|
136
|
+
timeout=actual_lock_timeout,
|
137
|
+
):
|
138
|
+
return self._sftp_unsafe().put(local, remote)
|
102
139
|
|
103
|
-
def
|
104
|
-
self,
|
140
|
+
def _get(
|
141
|
+
self,
|
142
|
+
*,
|
143
|
+
local: str,
|
144
|
+
remote: str,
|
145
|
+
label: str,
|
146
|
+
lock_timeout: Optional[float] = None,
|
105
147
|
) -> Result:
|
106
148
|
actual_lock_timeout = self.default_lock_timeout
|
107
149
|
if lock_timeout is not None:
|
108
150
|
actual_lock_timeout = lock_timeout
|
109
|
-
with
|
110
|
-
|
151
|
+
with _acquire_lock_with_timeout(
|
152
|
+
lock=self._lock,
|
153
|
+
label=label,
|
154
|
+
timeout=actual_lock_timeout,
|
155
|
+
):
|
156
|
+
return self._sftp_unsafe().get(
|
157
|
+
remote,
|
158
|
+
local,
|
159
|
+
prefetch=self.sftp_get_prefetch,
|
160
|
+
max_concurrent_prefetch_requests=self.sftp_get_max_requests,
|
161
|
+
)
|
111
162
|
|
112
|
-
def
|
113
|
-
self, *args, lock_timeout: Optional[float] = None, **kwargs
|
163
|
+
def _run(
|
164
|
+
self, *args, label: str, lock_timeout: Optional[float] = None, **kwargs
|
114
165
|
) -> Any:
|
115
166
|
actual_lock_timeout = self.default_lock_timeout
|
116
167
|
if lock_timeout is not None:
|
117
168
|
actual_lock_timeout = lock_timeout
|
118
|
-
with
|
169
|
+
with _acquire_lock_with_timeout(
|
170
|
+
lock=self._lock,
|
171
|
+
label=label,
|
172
|
+
timeout=actual_lock_timeout,
|
173
|
+
):
|
119
174
|
return self._connection.run(*args, **kwargs)
|
120
175
|
|
121
|
-
def
|
176
|
+
def _sftp_unsafe(self) -> paramiko.sftp_client.SFTPClient:
|
177
|
+
"""
|
178
|
+
This is marked as unsafe because you should only use its methods
|
179
|
+
after acquiring a lock.
|
180
|
+
"""
|
122
181
|
return self._connection.sftp()
|
123
182
|
|
183
|
+
def read_remote_json_file(self, filepath: str) -> dict[str, Any]:
|
184
|
+
self.logger.info(f"START reading remote JSON file {filepath}.")
|
185
|
+
with _acquire_lock_with_timeout(
|
186
|
+
lock=self._lock,
|
187
|
+
label="read_remote_json_file",
|
188
|
+
timeout=self.default_lock_timeout,
|
189
|
+
):
|
190
|
+
with self._sftp_unsafe().open(filepath, "r") as f:
|
191
|
+
data = json.load(f)
|
192
|
+
self.logger.info(f"END reading remote JSON file {filepath}.")
|
193
|
+
return data
|
194
|
+
|
124
195
|
def check_connection(self) -> None:
|
125
196
|
"""
|
126
197
|
Open the SSH connection and handle exceptions.
|
@@ -131,7 +202,12 @@ class FractalSSH(object):
|
|
131
202
|
"""
|
132
203
|
if not self._connection.is_connected:
|
133
204
|
try:
|
134
|
-
|
205
|
+
with _acquire_lock_with_timeout(
|
206
|
+
lock=self._lock,
|
207
|
+
label="_connection.open",
|
208
|
+
timeout=self.default_lock_timeout,
|
209
|
+
):
|
210
|
+
self._connection.open()
|
135
211
|
except Exception as e:
|
136
212
|
raise RuntimeError(
|
137
213
|
f"Cannot open SSH connection. Original error:\n{str(e)}"
|
@@ -146,8 +222,12 @@ class FractalSSH(object):
|
|
146
222
|
because we observed cases where `is_connected=False` but the underlying
|
147
223
|
`Transport` object was not closed.
|
148
224
|
"""
|
149
|
-
|
150
|
-
|
225
|
+
with _acquire_lock_with_timeout(
|
226
|
+
lock=self._lock,
|
227
|
+
label="_connection.close",
|
228
|
+
timeout=self.default_lock_timeout,
|
229
|
+
):
|
230
|
+
self._connection.close()
|
151
231
|
|
152
232
|
if self._connection.client is not None:
|
153
233
|
self._connection.client.close()
|
@@ -197,8 +277,11 @@ class FractalSSH(object):
|
|
197
277
|
self.logger.info(f"{prefix} START running '{cmd}' over SSH.")
|
198
278
|
try:
|
199
279
|
# Case 1: Command runs successfully
|
200
|
-
res = self.
|
201
|
-
cmd,
|
280
|
+
res = self._run(
|
281
|
+
cmd,
|
282
|
+
label=f"run {cmd}",
|
283
|
+
lock_timeout=actual_lock_timeout,
|
284
|
+
hide=True,
|
202
285
|
)
|
203
286
|
t_1 = time.perf_counter()
|
204
287
|
self.logger.info(
|
@@ -250,7 +333,6 @@ class FractalSSH(object):
|
|
250
333
|
*,
|
251
334
|
local: str,
|
252
335
|
remote: str,
|
253
|
-
logger_name: Optional[str] = None,
|
254
336
|
lock_timeout: Optional[float] = None,
|
255
337
|
) -> None:
|
256
338
|
"""
|
@@ -261,28 +343,65 @@ class FractalSSH(object):
|
|
261
343
|
remote: Target path on remote host
|
262
344
|
fractal_ssh: FractalSSH connection object with custom lock
|
263
345
|
logger_name: Name of the logger
|
264
|
-
|
265
346
|
"""
|
266
347
|
try:
|
267
|
-
|
348
|
+
prefix = "[send_file]"
|
349
|
+
self.logger.info(f"{prefix} START transfer of '{local}' over SSH.")
|
350
|
+
self._put(
|
351
|
+
local=local,
|
352
|
+
remote=remote,
|
353
|
+
lock_timeout=lock_timeout,
|
354
|
+
label=f"send_file {local=} {remote=}",
|
355
|
+
)
|
356
|
+
self.logger.info(f"{prefix} END transfer of '{local}' over SSH.")
|
268
357
|
except Exception as e:
|
269
|
-
logger
|
270
|
-
logger.error(
|
358
|
+
self.logger.error(
|
271
359
|
f"Transferring {local=} to {remote=} over SSH failed.\n"
|
272
360
|
f"Original Error:\n{str(e)}."
|
273
361
|
)
|
274
362
|
raise e
|
275
363
|
|
364
|
+
def fetch_file(
|
365
|
+
self,
|
366
|
+
*,
|
367
|
+
local: str,
|
368
|
+
remote: str,
|
369
|
+
lock_timeout: Optional[float] = None,
|
370
|
+
) -> None:
|
371
|
+
"""
|
372
|
+
Transfer a file via SSH
|
373
|
+
|
374
|
+
Args:
|
375
|
+
local: Local path to file
|
376
|
+
remote: Target path on remote host
|
377
|
+
logger_name: Name of the logger
|
378
|
+
lock_timeout:
|
379
|
+
"""
|
380
|
+
try:
|
381
|
+
prefix = "[fetch_file] "
|
382
|
+
self.logger.info(f"{prefix} START fetching '{remote}' over SSH.")
|
383
|
+
self._get(
|
384
|
+
local=local,
|
385
|
+
remote=remote,
|
386
|
+
lock_timeout=lock_timeout,
|
387
|
+
label=f"fetch_file {local=} {remote=}",
|
388
|
+
)
|
389
|
+
self.logger.info(f"{prefix} END fetching '{remote}' over SSH.")
|
390
|
+
except Exception as e:
|
391
|
+
self.logger.error(
|
392
|
+
f"Transferring {remote=} to {local=} over SSH failed.\n"
|
393
|
+
f"Original Error:\n{str(e)}."
|
394
|
+
)
|
395
|
+
raise e
|
396
|
+
|
276
397
|
def mkdir(self, *, folder: str, parents: bool = True) -> None:
|
277
398
|
"""
|
278
399
|
Create a folder remotely via SSH.
|
279
400
|
|
280
401
|
Args:
|
281
402
|
folder:
|
282
|
-
fractal_ssh:
|
283
403
|
parents:
|
284
404
|
"""
|
285
|
-
# FIXME SSH: try using `mkdir` method of `paramiko.SFTPClient`
|
286
405
|
if parents:
|
287
406
|
cmd = f"mkdir -p {folder}"
|
288
407
|
else:
|
@@ -339,12 +458,18 @@ class FractalSSH(object):
|
|
339
458
|
contents: File contents
|
340
459
|
lock_timeout:
|
341
460
|
"""
|
461
|
+
self.logger.info(f"START writing to remote file {path}.")
|
342
462
|
actual_lock_timeout = self.default_lock_timeout
|
343
463
|
if lock_timeout is not None:
|
344
464
|
actual_lock_timeout = lock_timeout
|
345
|
-
with
|
346
|
-
|
465
|
+
with _acquire_lock_with_timeout(
|
466
|
+
lock=self._lock,
|
467
|
+
label=f"write_remote_file {path=}",
|
468
|
+
timeout=actual_lock_timeout,
|
469
|
+
):
|
470
|
+
with self._sftp_unsafe().open(filename=path, mode="w") as f:
|
347
471
|
f.write(content)
|
472
|
+
self.logger.info(f"END writing to remote file {path}.")
|
348
473
|
|
349
474
|
|
350
475
|
class FractalSSHList(object):
|
@@ -425,7 +550,11 @@ class FractalSSHList(object):
|
|
425
550
|
"look_for_keys": False,
|
426
551
|
},
|
427
552
|
)
|
428
|
-
with
|
553
|
+
with _acquire_lock_with_timeout(
|
554
|
+
lock=self._lock,
|
555
|
+
label="FractalSSHList.get",
|
556
|
+
timeout=self._timeout,
|
557
|
+
):
|
429
558
|
self._data[key] = FractalSSH(connection=connection)
|
430
559
|
return self._data[key]
|
431
560
|
|
@@ -465,7 +594,11 @@ class FractalSSHList(object):
|
|
465
594
|
key_path:
|
466
595
|
"""
|
467
596
|
key = (host, user, key_path)
|
468
|
-
with
|
597
|
+
with _acquire_lock_with_timeout(
|
598
|
+
lock=self._lock,
|
599
|
+
timeout=self._timeout,
|
600
|
+
label="FractalSSHList.remove",
|
601
|
+
):
|
469
602
|
self.logger.info(
|
470
603
|
f"Removing FractalSSH object for {user}@{host} "
|
471
604
|
"from collection."
|
@@ -492,24 +625,4 @@ class FractalSSHList(object):
|
|
492
625
|
f"Closing FractalSSH object for {user}@{host} "
|
493
626
|
f"({fractal_ssh_obj.is_connected=})."
|
494
627
|
)
|
495
|
-
|
496
|
-
fractal_ssh_obj.close()
|
497
|
-
|
498
|
-
@contextmanager
|
499
|
-
def acquire_lock_with_timeout(self) -> Generator[Literal[True], Any, None]:
|
500
|
-
self.logger.debug(
|
501
|
-
f"Trying to acquire lock, with timeout {self._timeout} s"
|
502
|
-
)
|
503
|
-
result = self._lock.acquire(timeout=self._timeout)
|
504
|
-
try:
|
505
|
-
if not result:
|
506
|
-
self.logger.error("Lock was *NOT* acquired.")
|
507
|
-
raise FractalSSHListTimeoutError(
|
508
|
-
f"Failed to acquire lock within {self._timeout} ss"
|
509
|
-
)
|
510
|
-
self.logger.debug("Lock was acquired.")
|
511
|
-
yield result
|
512
|
-
finally:
|
513
|
-
if result:
|
514
|
-
self._lock.release()
|
515
|
-
self.logger.debug("Lock was released")
|
628
|
+
fractal_ssh_obj.close()
|
@@ -1,4 +1,3 @@
|
|
1
|
-
import json
|
2
1
|
import os
|
3
2
|
from pathlib import Path
|
4
3
|
from tempfile import TemporaryDirectory
|
@@ -144,6 +143,13 @@ def background_collect_pip_ssh(
|
|
144
143
|
for key, value in task_group.model_dump().items():
|
145
144
|
logger.debug(f"task_group.{key}: {value}")
|
146
145
|
|
146
|
+
# `remove_venv_folder_upon_failure` is set to True only if
|
147
|
+
# script 1 goes through, which means that the remote folder
|
148
|
+
# `package_env_dir` did not already exist. If this remote
|
149
|
+
# folder already existed, then script 1 fails and the boolean
|
150
|
+
# flag `remove_venv_folder_upon_failure` remains false.
|
151
|
+
remove_venv_folder_upon_failure = False
|
152
|
+
|
147
153
|
# Open a DB session soon, since it is needed for updating `state`
|
148
154
|
with next(get_sync_db()) as db:
|
149
155
|
try:
|
@@ -187,12 +193,11 @@ def background_collect_pip_ssh(
|
|
187
193
|
# long operations that do not use the db
|
188
194
|
db.close()
|
189
195
|
|
190
|
-
#
|
191
|
-
#
|
192
|
-
# `
|
193
|
-
|
194
|
-
|
195
|
-
remove_venv_folder_upon_failure = False
|
196
|
+
# Create remote folder (note that because of `parents=True` we
|
197
|
+
# are in the `no error if existing, make parent directories as
|
198
|
+
# needed` scenario)
|
199
|
+
fractal_ssh.mkdir(folder=tasks_base_dir, parents=True)
|
200
|
+
|
196
201
|
stdout = _customize_and_run_template(
|
197
202
|
script_filename="_1_create_venv.sh",
|
198
203
|
**common_args,
|
@@ -263,8 +268,9 @@ def background_collect_pip_ssh(
|
|
263
268
|
).as_posix()
|
264
269
|
|
265
270
|
# Read and validate remote manifest file
|
266
|
-
|
267
|
-
|
271
|
+
pkg_manifest_dict = fractal_ssh.read_remote_json_file(
|
272
|
+
manifest_path_remote
|
273
|
+
)
|
268
274
|
logger.info(f"collecting - loaded {manifest_path_remote=}")
|
269
275
|
pkg_manifest = ManifestV2(**pkg_manifest_dict)
|
270
276
|
logger.info("collecting - manifest is a valid ManifestV2")
|
@@ -1,4 +1,4 @@
|
|
1
|
-
fractal_server/__init__.py,sha256=
|
1
|
+
fractal_server/__init__.py,sha256=U6Vt70hce3I5-D3BcN_SgVx3kKrnUis8AS0m58E-7IY,25
|
2
2
|
fractal_server/__main__.py,sha256=dEkCfzLLQrIlxsGC-HBfoR-RBMWnJDgNrxYTyzmE9c0,6146
|
3
3
|
fractal_server/alembic.ini,sha256=MWwi7GzjzawI9cCAK1LW7NxIBQDUqD12-ptJoq5JpP0,3153
|
4
4
|
fractal_server/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -55,7 +55,7 @@ fractal_server/app/routes/api/v2/task_collection.py,sha256=gCxOwigT_tfs8lCDNoE7n
|
|
55
55
|
fractal_server/app/routes/api/v2/task_collection_custom.py,sha256=9T0U_4gqrQbJCy6uFDCMSZ-b1sfNIzyz_qm4P41W2Gs,6133
|
56
56
|
fractal_server/app/routes/api/v2/task_group.py,sha256=P32EUYbtGThexSWe5zI9WUFrgoOMof035fJBILTNnfQ,5580
|
57
57
|
fractal_server/app/routes/api/v2/workflow.py,sha256=PyvkrUHHzFGUGZE5X0VW5u3DPQA7wtXXNcEpG7-N66I,8687
|
58
|
-
fractal_server/app/routes/api/v2/workflow_import.py,sha256=
|
58
|
+
fractal_server/app/routes/api/v2/workflow_import.py,sha256=rD26vZ-ztjehvglrERixTeHtXuzepAtgAuPiKRNz84Q,10981
|
59
59
|
fractal_server/app/routes/api/v2/workflowtask.py,sha256=ciHTwXXFiFnMF7ZpJ3Xs0q6YfuZrFvIjqndlzAEdZpo,6969
|
60
60
|
fractal_server/app/routes/auth/__init__.py,sha256=fao6CS0WiAjHDTvBzgBVV_bSXFpEAeDBF6Z6q7rRkPc,1658
|
61
61
|
fractal_server/app/routes/auth/_aux_auth.py,sha256=ifkNocTYatBSMYGwiR14qohmvR9SfMldceiEj6uJBrU,4783
|
@@ -84,7 +84,7 @@ fractal_server/app/runner/executors/slurm/remote.py,sha256=wLziIsGdSMiO-jIXM8x77
|
|
84
84
|
fractal_server/app/runner/executors/slurm/ssh/__init__.py,sha256=Cjn1rYvljddi96tAwS-qqGkNfOcfPzjChdaEZEObCcM,65
|
85
85
|
fractal_server/app/runner/executors/slurm/ssh/_executor_wait_thread.py,sha256=bKo5Ja0IGxJWpPWyh9dN0AG-PwzTDZzD5LyaEHB3YU4,3742
|
86
86
|
fractal_server/app/runner/executors/slurm/ssh/_slurm_job.py,sha256=rwlqZzoGo4SAb4nSlFjsQJdaCgfM1J6YGcjb8yYxlqc,4506
|
87
|
-
fractal_server/app/runner/executors/slurm/ssh/executor.py,sha256=
|
87
|
+
fractal_server/app/runner/executors/slurm/ssh/executor.py,sha256=si_RHAMnXwQorQ_gWeZ_hQ_cNQbbAuYPjg7nwFQoPVg,58709
|
88
88
|
fractal_server/app/runner/executors/slurm/sudo/__init__.py,sha256=Cjn1rYvljddi96tAwS-qqGkNfOcfPzjChdaEZEObCcM,65
|
89
89
|
fractal_server/app/runner/executors/slurm/sudo/_check_jobs_status.py,sha256=wAgwpVcr6JIslKHOuS0FhRa_6T1KCManyRJqA-fifzw,1909
|
90
90
|
fractal_server/app/runner/executors/slurm/sudo/_executor_wait_thread.py,sha256=z5LlhaiqAb8pHsF1WwdzXN39C5anQmwjo1rSQgtRAYE,4422
|
@@ -197,7 +197,7 @@ fractal_server/migrations/versions/efa89c30e0a4_add_project_timestamp_created.py
|
|
197
197
|
fractal_server/migrations/versions/f384e1c0cf5d_drop_task_default_args_columns.py,sha256=9BwqUS9Gf7UW_KjrzHbtViC880qhD452KAytkHWWZyk,746
|
198
198
|
fractal_server/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
199
199
|
fractal_server/ssh/__init__.py,sha256=sVUmzxf7_DuXG1xoLQ1_00fo5NPhi2LJipSmU5EAkPs,124
|
200
|
-
fractal_server/ssh/_fabric.py,sha256=
|
200
|
+
fractal_server/ssh/_fabric.py,sha256=Pha-gRVUImj1cMsxulrJzaQa6Z60CmMYRAS4o22FcP0,19506
|
201
201
|
fractal_server/string_tools.py,sha256=Z4qcleqXSG6RCG4hqS1emm0U-Bvv0sgTm_T87ZdYn7M,2395
|
202
202
|
fractal_server/syringe.py,sha256=3qSMW3YaMKKnLdgnooAINOPxnCOxP7y2jeAQYB21Gdo,2786
|
203
203
|
fractal_server/tasks/__init__.py,sha256=kadmVUoIghl8s190_Tt-8f-WBqMi8u8oU4Pvw39NHE8,23
|
@@ -211,7 +211,7 @@ fractal_server/tasks/v1/utils.py,sha256=J9oKys-82OehBxOon5wWl3CxjVBgYWeVEEyWGVFn
|
|
211
211
|
fractal_server/tasks/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
212
212
|
fractal_server/tasks/v2/_venv_pip.py,sha256=FOD20yKfTsW7sim3h7CsB6pgp85JBhELyYvbkpaiDKA,6390
|
213
213
|
fractal_server/tasks/v2/background_operations.py,sha256=jWmg_XkBmcXQfPKJu_eu0wtDL4sMp1QP2bIZ9QtMj_Y,15411
|
214
|
-
fractal_server/tasks/v2/background_operations_ssh.py,sha256=
|
214
|
+
fractal_server/tasks/v2/background_operations_ssh.py,sha256=W-w_a7FgrSc9FAVSXoVBzvXSCmH9oQkaDJx2S9hQPlc,13616
|
215
215
|
fractal_server/tasks/v2/database_operations.py,sha256=6r56yyFPnEBrXl6ncmO6D76znzISQCFZqCYcD-Ummd4,1213
|
216
216
|
fractal_server/tasks/v2/endpoint_operations.py,sha256=MtUoI0XWHuPSousDeH2IC2WU--AUKQVup6Q6AbHiNUA,4102
|
217
217
|
fractal_server/tasks/v2/templates/_1_create_venv.sh,sha256=7tt-B6n8KRN-pannZ0enE6XSxyq-hKRYRGY63CvtINI,1151
|
@@ -223,8 +223,8 @@ fractal_server/tasks/v2/utils.py,sha256=MnY6MhcxDRo4rPuXo2tQ252eWEPZF3OlCGe-p5Mr
|
|
223
223
|
fractal_server/urls.py,sha256=5o_qq7PzKKbwq12NHSQZDmDitn5RAOeQ4xufu-2v9Zk,448
|
224
224
|
fractal_server/utils.py,sha256=jrlCBPmC7F0ptBVcDac-EbZNsdYTLbHfX9oxkXthS5Q,2193
|
225
225
|
fractal_server/zip_tools.py,sha256=xYpzBshysD2nmxkD5WLYqMzPYUcCRM3kYy-7n9bJL-U,4426
|
226
|
-
fractal_server-2.7.
|
227
|
-
fractal_server-2.7.
|
228
|
-
fractal_server-2.7.
|
229
|
-
fractal_server-2.7.
|
230
|
-
fractal_server-2.7.
|
226
|
+
fractal_server-2.7.0a11.dist-info/LICENSE,sha256=QKAharUuhxL58kSoLizKJeZE3mTCBnX6ucmz8W0lxlk,1576
|
227
|
+
fractal_server-2.7.0a11.dist-info/METADATA,sha256=7vwgLiEeN-_a0vB2gnMmsPX_WMaa9BH3azyswEIT128,4631
|
228
|
+
fractal_server-2.7.0a11.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
229
|
+
fractal_server-2.7.0a11.dist-info/entry_points.txt,sha256=8tV2kynvFkjnhbtDnxAqImL6HMVKsopgGfew0DOp5UY,58
|
230
|
+
fractal_server-2.7.0a11.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|