sl-shared-assets 2.0.1__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +17 -9
- sl_shared_assets/__init__.pyi +12 -8
- sl_shared_assets/cli.py +258 -21
- sl_shared_assets/cli.pyi +44 -5
- sl_shared_assets/data_classes/__init__.py +8 -3
- sl_shared_assets/data_classes/__init__.pyi +8 -4
- sl_shared_assets/data_classes/configuration_data.py +149 -30
- sl_shared_assets/data_classes/configuration_data.pyi +49 -11
- sl_shared_assets/data_classes/runtime_data.py +70 -49
- sl_shared_assets/data_classes/runtime_data.pyi +41 -33
- sl_shared_assets/data_classes/session_data.py +193 -253
- sl_shared_assets/data_classes/session_data.pyi +99 -116
- sl_shared_assets/data_classes/surgery_data.py +1 -1
- sl_shared_assets/server/__init__.py +2 -2
- sl_shared_assets/server/__init__.pyi +5 -2
- sl_shared_assets/server/job.py +229 -1
- sl_shared_assets/server/job.pyi +111 -0
- sl_shared_assets/server/server.py +431 -31
- sl_shared_assets/server/server.pyi +158 -15
- sl_shared_assets/tools/__init__.py +2 -1
- sl_shared_assets/tools/__init__.pyi +2 -0
- sl_shared_assets/tools/ascension_tools.py +9 -21
- sl_shared_assets/tools/ascension_tools.pyi +1 -1
- sl_shared_assets/tools/packaging_tools.py +2 -2
- sl_shared_assets/tools/project_management_tools.py +147 -41
- sl_shared_assets/tools/project_management_tools.pyi +45 -6
- {sl_shared_assets-2.0.1.dist-info → sl_shared_assets-3.0.0.dist-info}/METADATA +127 -13
- sl_shared_assets-3.0.0.dist-info/RECORD +36 -0
- {sl_shared_assets-2.0.1.dist-info → sl_shared_assets-3.0.0.dist-info}/entry_points.txt +2 -0
- sl_shared_assets-2.0.1.dist-info/RECORD +0 -36
- {sl_shared_assets-2.0.1.dist-info → sl_shared_assets-3.0.0.dist-info}/WHEEL +0 -0
- {sl_shared_assets-2.0.1.dist-info → sl_shared_assets-3.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,20 +4,23 @@ the running job status. All lab processing and analysis pipelines use this inter
|
|
|
4
4
|
resources.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
import
|
|
7
|
+
import stat
|
|
8
|
+
from random import randint
|
|
8
9
|
from pathlib import Path
|
|
9
10
|
import tempfile
|
|
10
|
-
from dataclasses import dataclass
|
|
11
|
+
from dataclasses import field, dataclass
|
|
11
12
|
|
|
12
13
|
import paramiko
|
|
13
14
|
|
|
14
15
|
# noinspection PyProtectedMember
|
|
15
16
|
from simple_slurm import Slurm # type: ignore
|
|
17
|
+
from ataraxis_time import PrecisionTimer
|
|
16
18
|
from paramiko.client import SSHClient
|
|
17
19
|
from ataraxis_base_utilities import LogLevel, console
|
|
18
20
|
from ataraxis_data_structures import YamlConfig
|
|
21
|
+
from ataraxis_time.time_helpers import get_timestamp
|
|
19
22
|
|
|
20
|
-
from .job import Job
|
|
23
|
+
from .job import Job, JupyterJob
|
|
21
24
|
|
|
22
25
|
|
|
23
26
|
def generate_server_credentials(
|
|
@@ -25,30 +28,36 @@ def generate_server_credentials(
|
|
|
25
28
|
username: str,
|
|
26
29
|
password: str,
|
|
27
30
|
host: str = "cbsuwsun.biohpc.cornell.edu",
|
|
28
|
-
|
|
29
|
-
|
|
31
|
+
storage_root: str = "/local/workdir",
|
|
32
|
+
working_root: str = "/local/storage",
|
|
33
|
+
shared_directory_name: str = "sun_data",
|
|
30
34
|
) -> None:
|
|
31
35
|
"""Generates a new server_credentials.yaml file under the specified directory, using input information.
|
|
32
36
|
|
|
33
37
|
This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
|
|
34
|
-
only used when setting up new host-computers in the lab.
|
|
38
|
+
only used when setting up new host-computers or users in the lab.
|
|
35
39
|
|
|
36
40
|
Args:
|
|
37
41
|
output_directory: The directory where to save the generated server_credentials.yaml file.
|
|
38
42
|
username: The username to use for server authentication.
|
|
39
43
|
password: The password to use for server authentication.
|
|
40
44
|
host: The hostname or IP address of the server to connect to.
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
+
storage_root: The path to the root storage (slow) server directory. Typically, this is the path to the
|
|
46
|
+
top-level (root) directory of the HDD RAID volume.
|
|
47
|
+
working_root: The path to the root working (fast) server directory. Typically, this is the path to the
|
|
48
|
+
top-level (root) directory of the NVME RAID volume. If the server uses the same volume for both storage and
|
|
49
|
+
working directories, enter the same path under both 'storage_root' and 'working_root'.
|
|
50
|
+
shared_directory_name: The name of the shared directory used to store all Sun lab project data on the storage
|
|
51
|
+
and working server volumes.
|
|
45
52
|
"""
|
|
53
|
+
# noinspection PyArgumentList
|
|
46
54
|
ServerCredentials(
|
|
47
55
|
username=username,
|
|
48
56
|
password=password,
|
|
49
57
|
host=host,
|
|
50
|
-
|
|
51
|
-
|
|
58
|
+
storage_root=storage_root,
|
|
59
|
+
working_root=working_root,
|
|
60
|
+
shared_directory_name=shared_directory_name,
|
|
52
61
|
).to_yaml(file_path=output_directory.joinpath("server_credentials.yaml"))
|
|
53
62
|
|
|
54
63
|
|
|
@@ -68,11 +77,37 @@ class ServerCredentials(YamlConfig):
|
|
|
68
77
|
"""The password to use for server authentication."""
|
|
69
78
|
host: str = "cbsuwsun.biohpc.cornell.edu"
|
|
70
79
|
"""The hostname or IP address of the server to connect to."""
|
|
71
|
-
|
|
80
|
+
storage_root: str = "/local/storage"
|
|
81
|
+
"""The path to the root storage (slow) server directory. Typically, this is the path to the top-level (root)
|
|
82
|
+
directory of the HDD RAID volume."""
|
|
83
|
+
working_root: str = "/local/workdir"
|
|
84
|
+
"""The path to the root working (fast) server directory. Typically, this is the path to the top-level (root)
|
|
85
|
+
directory of the NVME RAID volume. If the server uses the same volume for both storage and working directories,
|
|
86
|
+
enter the same path under both 'storage_root' and 'working_root'."""
|
|
87
|
+
shared_directory_name: str = "sun_data"
|
|
88
|
+
"""Stores the name of the shared directory used to store all Sun lab project data on the storage and working
|
|
89
|
+
server volumes."""
|
|
90
|
+
raw_data_root: str = field(init=False, default_factory=lambda: "/local/storage/sun_data")
|
|
72
91
|
"""The path to the root directory used to store the raw data from all Sun lab projects on the target server."""
|
|
73
|
-
processed_data_root: str = "/
|
|
92
|
+
processed_data_root: str = field(init=False, default_factory=lambda: "/local/workdir/sun_data")
|
|
74
93
|
"""The path to the root directory used to store the processed data from all Sun lab projects on the target
|
|
75
94
|
server."""
|
|
95
|
+
user_data_root: str = field(init=False, default_factory=lambda: "/local/storage/YourNetID")
|
|
96
|
+
"""The path to the root directory of the user on the target server. Unlike raw and processed data roots, which are
|
|
97
|
+
shared between all Sun lab users, each user_data directory is unique for every server user."""
|
|
98
|
+
user_working_root: str = field(init=False, default_factory=lambda: "/local/workdir/YourNetID")
|
|
99
|
+
"""The path to the root user working directory on the target server. This directory is unique for every user."""
|
|
100
|
+
|
|
101
|
+
def __post_init__(self) -> None:
|
|
102
|
+
"""Statically resolves the paths to end-point directories using provided root directories."""
|
|
103
|
+
|
|
104
|
+
# Shared Sun Lab directories statically use 'sun_data' root names
|
|
105
|
+
self.raw_data_root = str(Path(self.storage_root).joinpath(self.shared_directory_name))
|
|
106
|
+
self.processed_data_root = str(Path(self.working_root).joinpath(self.shared_directory_name))
|
|
107
|
+
|
|
108
|
+
# User directories exist at the same level as the 'shared' root project directories, but user user-ids as names.
|
|
109
|
+
self.user_data_root = str(Path(self.storage_root).joinpath(f"{self.username}"))
|
|
110
|
+
self.user_working_root = str(Path(self.working_root).joinpath(f"{self.username}"))
|
|
76
111
|
|
|
77
112
|
|
|
78
113
|
class Server:
|
|
@@ -105,6 +140,9 @@ class Server:
|
|
|
105
140
|
# Loads the credentials from the provided .yaml file
|
|
106
141
|
self._credentials: ServerCredentials = ServerCredentials.from_yaml(credentials_path) # type: ignore
|
|
107
142
|
|
|
143
|
+
# Initializes a timer class to optionally delay loop cycling below
|
|
144
|
+
timer = PrecisionTimer("s")
|
|
145
|
+
|
|
108
146
|
# Establishes the SSH connection to the specified processing server. At most, attempts to connect to the server
|
|
109
147
|
# 30 times before terminating with an error
|
|
110
148
|
attempt = 0
|
|
@@ -135,17 +173,140 @@ class Server:
|
|
|
135
173
|
raise RuntimeError
|
|
136
174
|
|
|
137
175
|
console.echo(
|
|
138
|
-
f"Could not SSH
|
|
176
|
+
f"Could not SSH into {self._credentials.host}, retrying after a 2-second delay...",
|
|
139
177
|
level=LogLevel.WARNING,
|
|
140
178
|
)
|
|
141
179
|
attempt += 1
|
|
142
|
-
|
|
180
|
+
timer.delay_noblock(delay=2, allow_sleep=True)
|
|
143
181
|
|
|
144
182
|
def __del__(self) -> None:
|
|
145
183
|
"""If the instance is connected to the server, terminates the connection before the instance is destroyed."""
|
|
146
184
|
self.close()
|
|
147
185
|
|
|
148
|
-
def
|
|
186
|
+
def create_job(
|
|
187
|
+
self,
|
|
188
|
+
job_name: str,
|
|
189
|
+
conda_environment: str,
|
|
190
|
+
cpus_to_use: int = 10,
|
|
191
|
+
ram_gb: int = 10,
|
|
192
|
+
time_limit: int = 60,
|
|
193
|
+
) -> Job:
|
|
194
|
+
"""Creates and returns a new Job instance.
|
|
195
|
+
|
|
196
|
+
Use this method to generate Job objects for all headless jobs that need to be run on the remote server. The
|
|
197
|
+
generated Job is a precursor that requires further configuration by the user before it can be submitted to the
|
|
198
|
+
server for execution.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
job_name: The descriptive name of the SLURM job to be created. Primarily, this name is used in terminal
|
|
202
|
+
printouts to identify the job to human operators.
|
|
203
|
+
conda_environment: The name of the conda environment to activate on the server before running the job logic.
|
|
204
|
+
The environment should contain the necessary Python packages and CLIs to support running the job's
|
|
205
|
+
logic.
|
|
206
|
+
cpus_to_use: The number of CPUs to use for the job.
|
|
207
|
+
ram_gb: The amount of RAM to allocate for the job, in Gigabytes.
|
|
208
|
+
time_limit: The maximum time limit for the job, in minutes. If the job is still running at the end of this
|
|
209
|
+
time period, it will be forcibly terminated. It is highly advised to always set adequate maximum runtime
|
|
210
|
+
limits to prevent jobs from hogging the server in case of runtime or algorithm errors.
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
The initialized Job instance pre-filled with SLURM configuration data and conda activation commands. Modify
|
|
214
|
+
the returned instance with any additional commands as necessary for the job to fulfill its intended
|
|
215
|
+
purpose. Note, the Job requires submission via submit_job() to be executed by the server.
|
|
216
|
+
"""
|
|
217
|
+
# Statically configures the working directory to be stored under:
|
|
218
|
+
# user working root / job_logs / job_name_timestamp
|
|
219
|
+
timestamp = get_timestamp()
|
|
220
|
+
working_directory = Path(self.user_working_root.joinpath("job_logs", f"{job_name}_{timestamp}"))
|
|
221
|
+
self.create_directory(remote_path=working_directory, parents=True)
|
|
222
|
+
|
|
223
|
+
return Job(
|
|
224
|
+
job_name=job_name,
|
|
225
|
+
output_log=working_directory.joinpath("stdout.txt"),
|
|
226
|
+
error_log=working_directory.joinpath("stderr.txt"),
|
|
227
|
+
working_directory=working_directory,
|
|
228
|
+
conda_environment=conda_environment,
|
|
229
|
+
cpus_to_use=cpus_to_use,
|
|
230
|
+
ram_gb=ram_gb,
|
|
231
|
+
time_limit=time_limit,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
def launch_jupyter_server(
|
|
235
|
+
self,
|
|
236
|
+
job_name: str,
|
|
237
|
+
conda_environment: str,
|
|
238
|
+
notebook_directory: Path,
|
|
239
|
+
cpus_to_use: int = 2,
|
|
240
|
+
ram_gb: int = 32,
|
|
241
|
+
time_limit: int = 240,
|
|
242
|
+
port: int = 0,
|
|
243
|
+
jupyter_args: str = "",
|
|
244
|
+
) -> JupyterJob:
|
|
245
|
+
"""Launches a Jupyter notebook server on the target remote Sun lab server.
|
|
246
|
+
|
|
247
|
+
Use this method to run interactive Jupyter sessions on the remote server under SLURM control. Unlike the
|
|
248
|
+
create_job(), this method automatically submits the job for execution as part of its runtime. Therefore, the
|
|
249
|
+
returned JupyterJob instance should only be used to query information about how to connect to the remote
|
|
250
|
+
Jupyter server.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
job_name: The descriptive name of the Jupyter SLURM job to be created. Primarily, this name is used in
|
|
254
|
+
terminal printouts to identify the job to human operators.
|
|
255
|
+
conda_environment: The name of the conda environment to activate on the server before running the job logic.
|
|
256
|
+
The environment should contain the necessary Python packages and CLIs to support running the job's
|
|
257
|
+
logic. For Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
|
|
258
|
+
port: The connection port number for Jupyter server. If set to 0 (default), a random port number between
|
|
259
|
+
8888 and 9999 will be assigned to this connection to reduce the possibility of colliding with other
|
|
260
|
+
user sessions.
|
|
261
|
+
notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have GUI
|
|
262
|
+
access to items stored in or under this directory. For most runtimes, this should be set to the user's
|
|
263
|
+
root data or working directory.
|
|
264
|
+
cpus_to_use: The number of CPUs to allocate to the Jupyter server. Keep this value as small as possible to
|
|
265
|
+
avoid interfering with headless data processing jobs.
|
|
266
|
+
ram_gb: The amount of RAM, in GB, to allocate to the Jupyter server. Keep this value as small as possible to
|
|
267
|
+
avoid interfering with headless data processing jobs.
|
|
268
|
+
time_limit: The maximum Jupyter server uptime, in minutes. Set this to the expected duration of your jupyter
|
|
269
|
+
session.
|
|
270
|
+
jupyter_args: Stores additional arguments to pass to jupyter notebook initialization command.
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
The initialized JupyterJob instance that stores information on how to connect to the created Jupyter server.
|
|
274
|
+
Do NOT re-submit the job to the server, as this is done as part of this method's runtime.
|
|
275
|
+
|
|
276
|
+
Raises:
|
|
277
|
+
TimeoutError: If the target Jupyter server doesn't start within 120 minutes from this method being called.
|
|
278
|
+
RuntimeError: If job submission fails for any reason.
|
|
279
|
+
"""
|
|
280
|
+
|
|
281
|
+
# Statically configures the working directory to be stored under:
|
|
282
|
+
# user working root / job_logs / job_name_timestamp
|
|
283
|
+
timestamp = get_timestamp()
|
|
284
|
+
working_directory = Path(self.user_working_root.joinpath("job_logs", f"{job_name}_{timestamp}"))
|
|
285
|
+
self.create_directory(remote_path=working_directory, parents=True)
|
|
286
|
+
|
|
287
|
+
# If necessary, generates and sets port to a random value between 8888 and 9999.
|
|
288
|
+
if port == 0:
|
|
289
|
+
port = randint(8888, 9999)
|
|
290
|
+
|
|
291
|
+
job = JupyterJob(
|
|
292
|
+
job_name=job_name,
|
|
293
|
+
output_log=working_directory.joinpath("stdout.txt"),
|
|
294
|
+
error_log=working_directory.joinpath("stderr.txt"),
|
|
295
|
+
working_directory=working_directory,
|
|
296
|
+
conda_environment=conda_environment,
|
|
297
|
+
notebook_directory=notebook_directory,
|
|
298
|
+
port=port,
|
|
299
|
+
cpus_to_use=cpus_to_use,
|
|
300
|
+
ram_gb=ram_gb,
|
|
301
|
+
time_limit=time_limit,
|
|
302
|
+
jupyter_args=jupyter_args,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
# Submits the job to the server and, if submission is successful, returns the JupyterJob object extended to
|
|
306
|
+
# include connection data received from the server.
|
|
307
|
+
return self.submit_job(job) # type: ignore[return-value]
|
|
308
|
+
|
|
309
|
+
def submit_job(self, job: Job | JupyterJob) -> Job | JupyterJob:
|
|
149
310
|
"""Submits the input job to the managed BioHPC server via SLURM job manager.
|
|
150
311
|
|
|
151
312
|
This method submits various jobs for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
|
|
@@ -162,6 +323,7 @@ class Server:
|
|
|
162
323
|
Raises:
|
|
163
324
|
RuntimeError: If job submission to the server fails.
|
|
164
325
|
"""
|
|
326
|
+
console.echo(message=f"Submitting '{job.job_name}' job to the remote server {self.host}...")
|
|
165
327
|
|
|
166
328
|
# Generates a temporary shell script on the local machine. Uses tempfile to automatically remove the
|
|
167
329
|
# local script as soon as it is uploaded to the server.
|
|
@@ -197,9 +359,62 @@ class Server:
|
|
|
197
359
|
# Job object
|
|
198
360
|
job_id = job_output.split()[-1]
|
|
199
361
|
job.job_id = job_id
|
|
362
|
+
|
|
363
|
+
# Special processing for Jupyter jobs
|
|
364
|
+
if isinstance(job, JupyterJob):
|
|
365
|
+
# Transfers host and user information to the JupyterJob object
|
|
366
|
+
job.host = self.host
|
|
367
|
+
job.user = self.user
|
|
368
|
+
|
|
369
|
+
# Initializes a timer class to optionally delay loop cycling below
|
|
370
|
+
timer = PrecisionTimer("s")
|
|
371
|
+
|
|
372
|
+
timer.reset()
|
|
373
|
+
while timer.elapsed < 120: # Waits for at most 2 minutes before terminating with an error
|
|
374
|
+
# Checks if the connection info file exists
|
|
375
|
+
try:
|
|
376
|
+
# Pulls the connection info file
|
|
377
|
+
local_info_file = Path(f"/tmp/{job.job_name}_connection.txt")
|
|
378
|
+
self.pull_file(local_file_path=local_info_file, remote_file_path=job.connection_info_file)
|
|
379
|
+
|
|
380
|
+
# Parses connection data from the file and caches it inside Job class attributes
|
|
381
|
+
job.parse_connection_info(local_info_file)
|
|
382
|
+
|
|
383
|
+
# Removes the local file copy after it is parsed
|
|
384
|
+
local_info_file.unlink(missing_ok=True)
|
|
385
|
+
|
|
386
|
+
# Also removes the remote copy once the runtime is over
|
|
387
|
+
self.remove(remote_path=job.connection_info_file, is_dir=False)
|
|
388
|
+
|
|
389
|
+
# Breaks the waiting loop
|
|
390
|
+
break
|
|
391
|
+
|
|
392
|
+
except Exception:
|
|
393
|
+
# The file doesn't exist yet or job initialization failed
|
|
394
|
+
if self.job_complete(job):
|
|
395
|
+
message = (
|
|
396
|
+
f"Remote jupyter server job {job.job_name} with id {job.job_id} encountered a startup and "
|
|
397
|
+
f"was terminated prematurely."
|
|
398
|
+
)
|
|
399
|
+
console.error(message, RuntimeError)
|
|
400
|
+
|
|
401
|
+
timer.delay_noblock(delay=5, allow_sleep=True) # Waits for 5 seconds before checking again
|
|
402
|
+
else:
|
|
403
|
+
# Only raises timeout error if the while loop is not broken in 120 seconds
|
|
404
|
+
message = (
|
|
405
|
+
f"Remote jupyter server job {job.job_name} with id {job.job_id} did not start within 120 seconds "
|
|
406
|
+
f"from being submitted. Since all jupyter jobs are intended to be interactive and the server is "
|
|
407
|
+
f"busy running other jobs, this job is cancelled. Try again when the server is less busy."
|
|
408
|
+
)
|
|
409
|
+
console.error(message, TimeoutError)
|
|
410
|
+
raise TimeoutError(message) # Fallback to appease mypy
|
|
411
|
+
|
|
412
|
+
console.echo(message=f"{job.job_name} job: Submitted to {self.host}.", level=LogLevel.SUCCESS)
|
|
413
|
+
|
|
414
|
+
# Returns the updated job object
|
|
200
415
|
return job
|
|
201
416
|
|
|
202
|
-
def job_complete(self, job: Job) -> bool:
|
|
417
|
+
def job_complete(self, job: Job | JupyterJob) -> bool:
|
|
203
418
|
"""Returns True if the job managed by the input Job instance has been completed or terminated its runtime due
|
|
204
419
|
to an error.
|
|
205
420
|
|
|
@@ -228,6 +443,24 @@ class Server:
|
|
|
228
443
|
else:
|
|
229
444
|
return False
|
|
230
445
|
|
|
446
|
+
def abort_job(self, job: Job | JupyterJob) -> None:
|
|
447
|
+
"""Aborts the target job if it is currently running on the server.
|
|
448
|
+
|
|
449
|
+
Use this method to immediately abort running or queued jobs, without waiting for the timeout guard. If the job
|
|
450
|
+
is queued, this method will remove it from the SLURM queue. If the job is already terminated, this method will
|
|
451
|
+
do nothing.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
job: The Job object that needs to be aborted.
|
|
455
|
+
"""
|
|
456
|
+
|
|
457
|
+
# Sends the 'scancel' command to the server targeting the specific Job via ID, unless the job is already
|
|
458
|
+
# complete
|
|
459
|
+
if not self.job_complete(job):
|
|
460
|
+
self._client.exec_command(f"scancel {job.job_id}")
|
|
461
|
+
|
|
462
|
+
console.echo(message=f"{job.job_name} job: Aborted.", level=LogLevel.SUCCESS)
|
|
463
|
+
|
|
231
464
|
def pull_file(self, local_file_path: Path, remote_file_path: Path) -> None:
|
|
232
465
|
"""Moves the specified file from the remote server to the local machine.
|
|
233
466
|
|
|
@@ -236,8 +469,10 @@ class Server:
|
|
|
236
469
|
remote_file_path: The path to the target file on the remote server (the file to be copied).
|
|
237
470
|
"""
|
|
238
471
|
sftp = self._client.open_sftp()
|
|
239
|
-
|
|
240
|
-
|
|
472
|
+
try:
|
|
473
|
+
sftp.get(localpath=local_file_path, remotepath=str(remote_file_path))
|
|
474
|
+
finally:
|
|
475
|
+
sftp.close()
|
|
241
476
|
|
|
242
477
|
def push_file(self, local_file_path: Path, remote_file_path: Path) -> None:
|
|
243
478
|
"""Moves the specified file from the remote server to the local machine.
|
|
@@ -247,8 +482,75 @@ class Server:
|
|
|
247
482
|
remote_file_path: The path to the file on the remote server (where to copy the file).
|
|
248
483
|
"""
|
|
249
484
|
sftp = self._client.open_sftp()
|
|
250
|
-
|
|
251
|
-
|
|
485
|
+
try:
|
|
486
|
+
sftp.put(localpath=local_file_path, remotepath=str(remote_file_path))
|
|
487
|
+
finally:
|
|
488
|
+
sftp.close()
|
|
489
|
+
|
|
490
|
+
def pull_directory(self, local_directory_path: Path, remote_directory_path: Path) -> None:
|
|
491
|
+
"""Recursively downloads the entire target directory from the remote server to the local machine.
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
local_directory_path: The path to the local directory where the remote directory will be copied.
|
|
495
|
+
remote_directory_path: The path to the directory on the remote server to be downloaded.
|
|
496
|
+
"""
|
|
497
|
+
sftp = self._client.open_sftp()
|
|
498
|
+
|
|
499
|
+
try:
|
|
500
|
+
# Creates the local directory if it doesn't exist
|
|
501
|
+
local_directory_path.mkdir(parents=True, exist_ok=True)
|
|
502
|
+
|
|
503
|
+
# Gets the list of items in the remote directory
|
|
504
|
+
remote_items = sftp.listdir_attr(str(remote_directory_path))
|
|
505
|
+
|
|
506
|
+
for item in remote_items:
|
|
507
|
+
remote_item_path = remote_directory_path.joinpath(item.filename)
|
|
508
|
+
local_item_path = local_directory_path.joinpath(item.filename)
|
|
509
|
+
|
|
510
|
+
# Checks if item is a directory
|
|
511
|
+
if stat.S_ISDIR(item.st_mode): # type: ignore
|
|
512
|
+
# Recursively pulls the subdirectory
|
|
513
|
+
self.pull_directory(local_item_path, remote_item_path)
|
|
514
|
+
else:
|
|
515
|
+
# Pulls the individual file using existing method
|
|
516
|
+
sftp.get(localpath=str(local_item_path), remotepath=str(remote_item_path))
|
|
517
|
+
|
|
518
|
+
finally:
|
|
519
|
+
sftp.close()
|
|
520
|
+
|
|
521
|
+
def push_directory(self, local_directory_path: Path, remote_directory_path: Path) -> None:
|
|
522
|
+
"""Recursively uploads the entire target directory from the local machine to the remote server.
|
|
523
|
+
|
|
524
|
+
Args:
|
|
525
|
+
local_directory_path: The path to the local directory to be uploaded.
|
|
526
|
+
remote_directory_path: The path on the remote server where the directory will be copied.
|
|
527
|
+
"""
|
|
528
|
+
if not local_directory_path.exists() or not local_directory_path.is_dir():
|
|
529
|
+
message = (
|
|
530
|
+
f"Unable to upload the target local directory {local_directory_path} to the server, as it does not "
|
|
531
|
+
f"exist."
|
|
532
|
+
)
|
|
533
|
+
console.error(message=message, error=FileNotFoundError)
|
|
534
|
+
|
|
535
|
+
sftp = self._client.open_sftp()
|
|
536
|
+
|
|
537
|
+
try:
|
|
538
|
+
# Creates the remote directory using existing method
|
|
539
|
+
self.create_directory(remote_directory_path, parents=True)
|
|
540
|
+
|
|
541
|
+
# Iterates through all items in the local directory
|
|
542
|
+
for local_item_path in local_directory_path.iterdir():
|
|
543
|
+
remote_item_path = remote_directory_path.joinpath(local_item_path.name)
|
|
544
|
+
|
|
545
|
+
if local_item_path.is_dir():
|
|
546
|
+
# Recursively pushes subdirectory
|
|
547
|
+
self.push_directory(local_item_path, remote_item_path)
|
|
548
|
+
else:
|
|
549
|
+
# Pushes the individual file using existing method
|
|
550
|
+
sftp.put(localpath=str(local_item_path), remotepath=str(remote_item_path))
|
|
551
|
+
|
|
552
|
+
finally:
|
|
553
|
+
sftp.close()
|
|
252
554
|
|
|
253
555
|
def remove(self, remote_path: Path, is_dir: bool) -> None:
|
|
254
556
|
"""Removes the specified file or directory from the remote server.
|
|
@@ -258,11 +560,87 @@ class Server:
|
|
|
258
560
|
is_dir: Determines whether the input path represents a directory or a file.
|
|
259
561
|
"""
|
|
260
562
|
sftp = self._client.open_sftp()
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
563
|
+
try:
|
|
564
|
+
if is_dir:
|
|
565
|
+
sftp.rmdir(path=str(remote_path))
|
|
566
|
+
else:
|
|
567
|
+
sftp.unlink(path=str(remote_path))
|
|
568
|
+
finally:
|
|
569
|
+
sftp.close()
|
|
570
|
+
|
|
571
|
+
def create_directory(self, remote_path: Path, parents: bool = True) -> None:
|
|
572
|
+
"""Creates the specified directory tree on the managed remote server via SFTP.
|
|
573
|
+
|
|
574
|
+
This method creates directories on the remote server, with options to create parent directories and handle
|
|
575
|
+
existing directories gracefully.
|
|
576
|
+
|
|
577
|
+
Args:
|
|
578
|
+
remote_path: The absolute path to the directory to create on the remote server, relative to the server
|
|
579
|
+
root.
|
|
580
|
+
parents: Determines whether to create parent directories, if they are missing. Otherwise, if parents do not
|
|
581
|
+
exist, raises a FileNotFoundError.
|
|
582
|
+
|
|
583
|
+
Notes:
|
|
584
|
+
This method silently assumes that it is fine if the directory already exists and treats it as a successful
|
|
585
|
+
runtime end-point.
|
|
586
|
+
"""
|
|
587
|
+
sftp = self._client.open_sftp()
|
|
588
|
+
|
|
589
|
+
try:
|
|
590
|
+
# Converts the target path to string for SFTP operations
|
|
591
|
+
remote_path_str = str(remote_path)
|
|
592
|
+
|
|
593
|
+
if parents:
|
|
594
|
+
# Creates parent directories if needed:
|
|
595
|
+
# Split the path into parts and create each level
|
|
596
|
+
path_parts = Path(remote_path_str).parts
|
|
597
|
+
current_path = ""
|
|
598
|
+
|
|
599
|
+
for part in path_parts:
|
|
600
|
+
# Skips empty path parts
|
|
601
|
+
if not part:
|
|
602
|
+
continue
|
|
603
|
+
|
|
604
|
+
if current_path:
|
|
605
|
+
# Keeps stacking path components on top of the current_path object
|
|
606
|
+
current_path = str(Path(current_path).joinpath(part))
|
|
607
|
+
else:
|
|
608
|
+
# Initially, the current path is empty, so it is set to the first part
|
|
609
|
+
current_path = part
|
|
610
|
+
|
|
611
|
+
try:
|
|
612
|
+
# Checks if directory exists by trying to stat it
|
|
613
|
+
sftp.stat(current_path)
|
|
614
|
+
except FileNotFoundError:
|
|
615
|
+
# If the directory does not exist, creates it
|
|
616
|
+
sftp.mkdir(current_path)
|
|
617
|
+
else:
|
|
618
|
+
# Otherwise, only creates the final directory
|
|
619
|
+
try:
|
|
620
|
+
# Checks if directory already exists
|
|
621
|
+
sftp.stat(remote_path_str)
|
|
622
|
+
except FileNotFoundError:
|
|
623
|
+
# Creates the directory if it does not exist
|
|
624
|
+
sftp.mkdir(remote_path_str)
|
|
625
|
+
|
|
626
|
+
# Ensures sftp connection is closed.
|
|
627
|
+
finally:
|
|
628
|
+
sftp.close()
|
|
629
|
+
|
|
630
|
+
def exists(self, remote_path: Path) -> bool:
|
|
631
|
+
"""Returns True if the target file or directory exists on the remote server."""
|
|
632
|
+
|
|
633
|
+
sftp = self._client.open_sftp()
|
|
634
|
+
try:
|
|
635
|
+
# Checks if the target file or directory exists by trying to stat it
|
|
636
|
+
sftp.stat(str(remote_path))
|
|
637
|
+
|
|
638
|
+
# If the request does not err, returns True (file or directory exists)
|
|
639
|
+
return True
|
|
640
|
+
|
|
641
|
+
# If the directory or file does not exist, returns False
|
|
642
|
+
except FileNotFoundError:
|
|
643
|
+
return False
|
|
266
644
|
|
|
267
645
|
def close(self) -> None:
|
|
268
646
|
"""Closes the SSH connection to the server.
|
|
@@ -274,15 +652,37 @@ class Server:
|
|
|
274
652
|
self._client.close()
|
|
275
653
|
|
|
276
654
|
@property
|
|
277
|
-
def raw_data_root(self) ->
|
|
655
|
+
def raw_data_root(self) -> Path:
|
|
278
656
|
"""Returns the absolute path to the directory used to store the raw data for all Sun lab projects on the server
|
|
279
657
|
accessible through this class.
|
|
280
658
|
"""
|
|
281
|
-
return self._credentials.raw_data_root
|
|
659
|
+
return Path(self._credentials.raw_data_root)
|
|
282
660
|
|
|
283
661
|
@property
|
|
284
|
-
def processed_data_root(self) ->
|
|
662
|
+
def processed_data_root(self) -> Path:
|
|
285
663
|
"""Returns the absolute path to the directory used to store the processed data for all Sun lab projects on the
|
|
286
664
|
server accessible through this class.
|
|
287
665
|
"""
|
|
288
|
-
return self._credentials.processed_data_root
|
|
666
|
+
return Path(self._credentials.processed_data_root)
|
|
667
|
+
|
|
668
|
+
@property
|
|
669
|
+
def user_data_root(self) -> Path:
|
|
670
|
+
"""Returns the absolute path to the directory used to store user-specific data on the server accessible through
|
|
671
|
+
this class."""
|
|
672
|
+
return Path(self._credentials.user_data_root)
|
|
673
|
+
|
|
674
|
+
@property
|
|
675
|
+
def user_working_root(self) -> Path:
|
|
676
|
+
"""Returns the absolute path to the user-specific working (fast) directory on the server accessible through
|
|
677
|
+
this class."""
|
|
678
|
+
return Path(self._credentials.user_working_root)
|
|
679
|
+
|
|
680
|
+
@property
|
|
681
|
+
def host(self) -> str:
|
|
682
|
+
"""Returns the hostname or IP address of the server accessible through this class."""
|
|
683
|
+
return self._credentials.host
|
|
684
|
+
|
|
685
|
+
@property
|
|
686
|
+
def user(self) -> str:
|
|
687
|
+
"""Returns the username used to authenticate with the server."""
|
|
688
|
+
return self._credentials.username
|