sl-shared-assets 2.0.0__py3-none-any.whl → 3.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +9 -5
- sl_shared_assets/__init__.pyi +4 -4
- sl_shared_assets/cli.py +270 -20
- sl_shared_assets/cli.pyi +50 -5
- sl_shared_assets/data_classes/configuration_data.py +20 -0
- sl_shared_assets/data_classes/configuration_data.pyi +14 -0
- sl_shared_assets/data_classes/session_data.py +7 -11
- sl_shared_assets/data_classes/session_data.pyi +1 -2
- sl_shared_assets/server/__init__.py +2 -2
- sl_shared_assets/server/__init__.pyi +5 -2
- sl_shared_assets/server/job.py +229 -1
- sl_shared_assets/server/job.pyi +111 -0
- sl_shared_assets/server/server.py +365 -31
- sl_shared_assets/server/server.pyi +144 -15
- sl_shared_assets/tools/__init__.py +2 -1
- sl_shared_assets/tools/__init__.pyi +2 -0
- sl_shared_assets/tools/packaging_tools.py +1 -2
- sl_shared_assets/tools/project_management_tools.py +150 -34
- sl_shared_assets/tools/project_management_tools.pyi +46 -3
- {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0rc1.dist-info}/METADATA +5 -6
- sl_shared_assets-3.0.0rc1.dist-info/RECORD +36 -0
- {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0rc1.dist-info}/entry_points.txt +2 -0
- sl_shared_assets-2.0.0.dist-info/RECORD +0 -36
- {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0rc1.dist-info}/WHEEL +0 -0
- {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0rc1.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,20 +4,22 @@ the running job status. All lab processing and analysis pipelines use this inter
|
|
|
4
4
|
resources.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
import
|
|
7
|
+
from random import randint
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
import tempfile
|
|
10
|
-
from dataclasses import dataclass
|
|
10
|
+
from dataclasses import field, dataclass
|
|
11
11
|
|
|
12
12
|
import paramiko
|
|
13
13
|
|
|
14
14
|
# noinspection PyProtectedMember
|
|
15
15
|
from simple_slurm import Slurm # type: ignore
|
|
16
|
+
from ataraxis_time import PrecisionTimer
|
|
16
17
|
from paramiko.client import SSHClient
|
|
17
18
|
from ataraxis_base_utilities import LogLevel, console
|
|
18
19
|
from ataraxis_data_structures import YamlConfig
|
|
20
|
+
from ataraxis_time.time_helpers import get_timestamp
|
|
19
21
|
|
|
20
|
-
from .job import Job
|
|
22
|
+
from .job import Job, JupyterJob
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
def generate_server_credentials(
|
|
@@ -25,30 +27,36 @@ def generate_server_credentials(
|
|
|
25
27
|
username: str,
|
|
26
28
|
password: str,
|
|
27
29
|
host: str = "cbsuwsun.biohpc.cornell.edu",
|
|
28
|
-
|
|
29
|
-
|
|
30
|
+
storage_root: str = "/local/workdir",
|
|
31
|
+
working_root: str = "/local/storage",
|
|
32
|
+
shared_directory_name: str = "sun_data",
|
|
30
33
|
) -> None:
|
|
31
34
|
"""Generates a new server_credentials.yaml file under the specified directory, using input information.
|
|
32
35
|
|
|
33
36
|
This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
|
|
34
|
-
only used when setting up new host-computers in the lab.
|
|
37
|
+
only used when setting up new host-computers or users in the lab.
|
|
35
38
|
|
|
36
39
|
Args:
|
|
37
40
|
output_directory: The directory where to save the generated server_credentials.yaml file.
|
|
38
41
|
username: The username to use for server authentication.
|
|
39
42
|
password: The password to use for server authentication.
|
|
40
43
|
host: The hostname or IP address of the server to connect to.
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
44
|
+
storage_root: The path to the root storage (slow) server directory. Typically, this is the path to the
|
|
45
|
+
top-level (root) directory of the HDD RAID volume.
|
|
46
|
+
working_root: The path to the root working (fast) server directory. Typically, this is the path to the
|
|
47
|
+
top-level (root) directory of the NVME RAID volume. If the server uses the same volume for both storage and
|
|
48
|
+
working directories, enter the same path under both 'storage_root' and 'working_root'.
|
|
49
|
+
shared_directory_name: The name of the shared directory used to store all Sun lab project data on the storage
|
|
50
|
+
and working server volumes.
|
|
45
51
|
"""
|
|
52
|
+
# noinspection PyArgumentList
|
|
46
53
|
ServerCredentials(
|
|
47
54
|
username=username,
|
|
48
55
|
password=password,
|
|
49
56
|
host=host,
|
|
50
|
-
|
|
51
|
-
|
|
57
|
+
storage_root=storage_root,
|
|
58
|
+
working_root=working_root,
|
|
59
|
+
shared_directory_name=shared_directory_name,
|
|
52
60
|
).to_yaml(file_path=output_directory.joinpath("server_credentials.yaml"))
|
|
53
61
|
|
|
54
62
|
|
|
@@ -68,11 +76,37 @@ class ServerCredentials(YamlConfig):
|
|
|
68
76
|
"""The password to use for server authentication."""
|
|
69
77
|
host: str = "cbsuwsun.biohpc.cornell.edu"
|
|
70
78
|
"""The hostname or IP address of the server to connect to."""
|
|
71
|
-
|
|
79
|
+
storage_root: str = "/local/storage"
|
|
80
|
+
"""The path to the root storage (slow) server directory. Typically, this is the path to the top-level (root)
|
|
81
|
+
directory of the HDD RAID volume."""
|
|
82
|
+
working_root: str = "/local/workdir"
|
|
83
|
+
"""The path to the root working (fast) server directory. Typically, this is the path to the top-level (root)
|
|
84
|
+
directory of the NVME RAID volume. If the server uses the same volume for both storage and working directories,
|
|
85
|
+
enter the same path under both 'storage_root' and 'working_root'."""
|
|
86
|
+
shared_directory_name: str = "sun_data"
|
|
87
|
+
"""Stores the name of the shared directory used to store all Sun lab project data on the storage and working
|
|
88
|
+
server volumes."""
|
|
89
|
+
raw_data_root: str = field(init=False, default_factory=lambda: "/local/storage/sun_data")
|
|
72
90
|
"""The path to the root directory used to store the raw data from all Sun lab projects on the target server."""
|
|
73
|
-
processed_data_root: str = "/
|
|
91
|
+
processed_data_root: str = field(init=False, default_factory=lambda: "/local/workdir/sun_data")
|
|
74
92
|
"""The path to the root directory used to store the processed data from all Sun lab projects on the target
|
|
75
93
|
server."""
|
|
94
|
+
user_data_root: str = field(init=False, default_factory=lambda: "/local/storage/YourNetID")
|
|
95
|
+
"""The path to the root directory of the user on the target server. Unlike raw and processed data roots, which are
|
|
96
|
+
shared between all Sun lab users, each user_data directory is unique for every server user."""
|
|
97
|
+
user_working_root: str = field(init=False, default_factory=lambda: "/local/workdir/YourNetID")
|
|
98
|
+
"""The path to the root user working directory on the target server. This directory is unique for every user."""
|
|
99
|
+
|
|
100
|
+
def __post_init__(self) -> None:
|
|
101
|
+
"""Statically resolves the paths to end-point directories using provided root directories."""
|
|
102
|
+
|
|
103
|
+
# Shared Sun Lab directories statically use 'sun_data' root names
|
|
104
|
+
self.raw_data_root = str(Path(self.storage_root).joinpath(self.shared_directory_name))
|
|
105
|
+
self.processed_data_root = str(Path(self.working_root).joinpath(self.shared_directory_name))
|
|
106
|
+
|
|
107
|
+
# User directories exist at the same level as the 'shared' root project directories, but user user-ids as names.
|
|
108
|
+
self.user_data_root = str(Path(self.storage_root).joinpath(f"{self.username}"))
|
|
109
|
+
self.user_working_root = str(Path(self.working_root).joinpath(f"{self.username}"))
|
|
76
110
|
|
|
77
111
|
|
|
78
112
|
class Server:
|
|
@@ -105,6 +139,9 @@ class Server:
|
|
|
105
139
|
# Loads the credentials from the provided .yaml file
|
|
106
140
|
self._credentials: ServerCredentials = ServerCredentials.from_yaml(credentials_path) # type: ignore
|
|
107
141
|
|
|
142
|
+
# Initializes a timer class to optionally delay loop cycling below
|
|
143
|
+
timer = PrecisionTimer("s")
|
|
144
|
+
|
|
108
145
|
# Establishes the SSH connection to the specified processing server. At most, attempts to connect to the server
|
|
109
146
|
# 30 times before terminating with an error
|
|
110
147
|
attempt = 0
|
|
@@ -135,17 +172,140 @@ class Server:
|
|
|
135
172
|
raise RuntimeError
|
|
136
173
|
|
|
137
174
|
console.echo(
|
|
138
|
-
f"Could not SSH
|
|
175
|
+
f"Could not SSH into {self._credentials.host}, retrying after a 2-second delay...",
|
|
139
176
|
level=LogLevel.WARNING,
|
|
140
177
|
)
|
|
141
178
|
attempt += 1
|
|
142
|
-
|
|
179
|
+
timer.delay_noblock(delay=2, allow_sleep=True)
|
|
143
180
|
|
|
144
181
|
def __del__(self) -> None:
|
|
145
182
|
"""If the instance is connected to the server, terminates the connection before the instance is destroyed."""
|
|
146
183
|
self.close()
|
|
147
184
|
|
|
148
|
-
def
|
|
185
|
+
def create_job(
|
|
186
|
+
self,
|
|
187
|
+
job_name: str,
|
|
188
|
+
conda_environment: str,
|
|
189
|
+
cpus_to_use: int = 10,
|
|
190
|
+
ram_gb: int = 10,
|
|
191
|
+
time_limit: int = 60,
|
|
192
|
+
) -> Job:
|
|
193
|
+
"""Creates and returns a new Job instance.
|
|
194
|
+
|
|
195
|
+
Use this method to generate Job objects for all headless jobs that need to be run on the remote server. The
|
|
196
|
+
generated Job is a precursor that requires further configuration by the user before it can be submitted to the
|
|
197
|
+
server for execution.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
job_name: The descriptive name of the SLURM job to be created. Primarily, this name is used in terminal
|
|
201
|
+
printouts to identify the job to human operators.
|
|
202
|
+
conda_environment: The name of the conda environment to activate on the server before running the job logic.
|
|
203
|
+
The environment should contain the necessary Python packages and CLIs to support running the job's
|
|
204
|
+
logic.
|
|
205
|
+
cpus_to_use: The number of CPUs to use for the job.
|
|
206
|
+
ram_gb: The amount of RAM to allocate for the job, in Gigabytes.
|
|
207
|
+
time_limit: The maximum time limit for the job, in minutes. If the job is still running at the end of this
|
|
208
|
+
time period, it will be forcibly terminated. It is highly advised to always set adequate maximum runtime
|
|
209
|
+
limits to prevent jobs from hogging the server in case of runtime or algorithm errors.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
The initialized Job instance pre-filled with SLURM configuration data and conda activation commands. Modify
|
|
213
|
+
the returned instance with any additional commands as necessary for the job to fulfill its intended
|
|
214
|
+
purpose. Note, the Job requires submission via submit_job() to be executed by the server.
|
|
215
|
+
"""
|
|
216
|
+
# Statically configures the working directory to be stored under:
|
|
217
|
+
# user working root / job_logs / job_name_timestamp
|
|
218
|
+
timestamp = get_timestamp()
|
|
219
|
+
working_directory = Path(self.user_working_root.joinpath("job_logs", f"{job_name}_{timestamp}"))
|
|
220
|
+
self.create_directory(remote_path=working_directory, parents=True)
|
|
221
|
+
|
|
222
|
+
return Job(
|
|
223
|
+
job_name=job_name,
|
|
224
|
+
output_log=working_directory.joinpath("stdout.txt"),
|
|
225
|
+
error_log=working_directory.joinpath("stderr.txt"),
|
|
226
|
+
working_directory=working_directory,
|
|
227
|
+
conda_environment=conda_environment,
|
|
228
|
+
cpus_to_use=cpus_to_use,
|
|
229
|
+
ram_gb=ram_gb,
|
|
230
|
+
time_limit=time_limit,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
def launch_jupyter_server(
|
|
234
|
+
self,
|
|
235
|
+
job_name: str,
|
|
236
|
+
conda_environment: str,
|
|
237
|
+
notebook_directory: Path,
|
|
238
|
+
cpus_to_use: int = 2,
|
|
239
|
+
ram_gb: int = 32,
|
|
240
|
+
time_limit: int = 240,
|
|
241
|
+
port: int = 0,
|
|
242
|
+
jupyter_args: str = "",
|
|
243
|
+
) -> JupyterJob:
|
|
244
|
+
"""Launches a Jupyter notebook server on the target remote Sun lab server.
|
|
245
|
+
|
|
246
|
+
Use this method to run interactive Jupyter sessions on the remote server under SLURM control. Unlike the
|
|
247
|
+
create_job(), this method automatically submits the job for execution as part of its runtime. Therefore, the
|
|
248
|
+
returned JupyterJob instance should only be used to query information about how to connect to the remote
|
|
249
|
+
Jupyter server.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
job_name: The descriptive name of the Jupyter SLURM job to be created. Primarily, this name is used in
|
|
253
|
+
terminal printouts to identify the job to human operators.
|
|
254
|
+
conda_environment: The name of the conda environment to activate on the server before running the job logic.
|
|
255
|
+
The environment should contain the necessary Python packages and CLIs to support running the job's
|
|
256
|
+
logic. For Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
|
|
257
|
+
port: The connection port number for Jupyter server. If set to 0 (default), a random port number between
|
|
258
|
+
8888 and 9999 will be assigned to this connection to reduce the possibility of colliding with other
|
|
259
|
+
user sessions.
|
|
260
|
+
notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have GUI
|
|
261
|
+
access to items stored in or under this directory. For most runtimes, this should be set to the user's
|
|
262
|
+
root data or working directory.
|
|
263
|
+
cpus_to_use: The number of CPUs to allocate to the Jupyter server. Keep this value as small as possible to
|
|
264
|
+
avoid interfering with headless data processing jobs.
|
|
265
|
+
ram_gb: The amount of RAM, in GB, to allocate to the Jupyter server. Keep this value as small as possible to
|
|
266
|
+
avoid interfering with headless data processing jobs.
|
|
267
|
+
time_limit: The maximum Jupyter server uptime, in minutes. Set this to the expected duration of your jupyter
|
|
268
|
+
session.
|
|
269
|
+
jupyter_args: Stores additional arguments to pass to jupyter notebook initialization command.
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
The initialized JupyterJob instance that stores information on how to connect to the created Jupyter server.
|
|
273
|
+
Do NOT re-submit the job to the server, as this is done as part of this method's runtime.
|
|
274
|
+
|
|
275
|
+
Raises:
|
|
276
|
+
TimeoutError: If the target Jupyter server doesn't start within 120 minutes from this method being called.
|
|
277
|
+
RuntimeError: If job submission fails for any reason.
|
|
278
|
+
"""
|
|
279
|
+
|
|
280
|
+
# Statically configures the working directory to be stored under:
|
|
281
|
+
# user working root / job_logs / job_name_timestamp
|
|
282
|
+
timestamp = get_timestamp()
|
|
283
|
+
working_directory = Path(self.user_working_root.joinpath("job_logs", f"{job_name}_{timestamp}"))
|
|
284
|
+
self.create_directory(remote_path=working_directory, parents=True)
|
|
285
|
+
|
|
286
|
+
# If necessary, generates and sets port to a random value between 8888 and 9999.
|
|
287
|
+
if port == 0:
|
|
288
|
+
port = randint(8888, 9999)
|
|
289
|
+
|
|
290
|
+
job = JupyterJob(
|
|
291
|
+
job_name=job_name,
|
|
292
|
+
output_log=working_directory.joinpath("stdout.txt"),
|
|
293
|
+
error_log=working_directory.joinpath("stderr.txt"),
|
|
294
|
+
working_directory=working_directory,
|
|
295
|
+
conda_environment=conda_environment,
|
|
296
|
+
notebook_directory=notebook_directory,
|
|
297
|
+
port=port,
|
|
298
|
+
cpus_to_use=cpus_to_use,
|
|
299
|
+
ram_gb=ram_gb,
|
|
300
|
+
time_limit=time_limit,
|
|
301
|
+
jupyter_args=jupyter_args,
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# Submits the job to the server and, if submission is successful, returns the JupyterJob object extended to
|
|
305
|
+
# include connection data received from the server.
|
|
306
|
+
return self.submit_job(job) # type: ignore[return-value]
|
|
307
|
+
|
|
308
|
+
def submit_job(self, job: Job | JupyterJob) -> Job | JupyterJob:
|
|
149
309
|
"""Submits the input job to the managed BioHPC server via SLURM job manager.
|
|
150
310
|
|
|
151
311
|
This method submits various jobs for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
|
|
@@ -162,6 +322,7 @@ class Server:
|
|
|
162
322
|
Raises:
|
|
163
323
|
RuntimeError: If job submission to the server fails.
|
|
164
324
|
"""
|
|
325
|
+
console.echo(message=f"Submitting '{job.job_name}' job to the remote server {self.host}...")
|
|
165
326
|
|
|
166
327
|
# Generates a temporary shell script on the local machine. Uses tempfile to automatically remove the
|
|
167
328
|
# local script as soon as it is uploaded to the server.
|
|
@@ -197,9 +358,62 @@ class Server:
|
|
|
197
358
|
# Job object
|
|
198
359
|
job_id = job_output.split()[-1]
|
|
199
360
|
job.job_id = job_id
|
|
361
|
+
|
|
362
|
+
# Special processing for Jupyter jobs
|
|
363
|
+
if isinstance(job, JupyterJob):
|
|
364
|
+
# Transfers host and user information to the JupyterJob object
|
|
365
|
+
job.host = self.host
|
|
366
|
+
job.user = self.user
|
|
367
|
+
|
|
368
|
+
# Initializes a timer class to optionally delay loop cycling below
|
|
369
|
+
timer = PrecisionTimer("s")
|
|
370
|
+
|
|
371
|
+
timer.reset()
|
|
372
|
+
while timer.elapsed < 120: # Waits for at most 2 minutes before terminating with an error
|
|
373
|
+
# Checks if the connection info file exists
|
|
374
|
+
try:
|
|
375
|
+
# Pulls the connection info file
|
|
376
|
+
local_info_file = Path(f"/tmp/{job.job_name}_connection.txt")
|
|
377
|
+
self.pull_file(local_file_path=local_info_file, remote_file_path=job.connection_info_file)
|
|
378
|
+
|
|
379
|
+
# Parses connection data from the file and caches it inside Job class attributes
|
|
380
|
+
job.parse_connection_info(local_info_file)
|
|
381
|
+
|
|
382
|
+
# Removes the local file copy after it is parsed
|
|
383
|
+
local_info_file.unlink(missing_ok=True)
|
|
384
|
+
|
|
385
|
+
# Also removes the remote copy once the runtime is over
|
|
386
|
+
self.remove(remote_path=job.connection_info_file, is_dir=False)
|
|
387
|
+
|
|
388
|
+
# Breaks the waiting loop
|
|
389
|
+
break
|
|
390
|
+
|
|
391
|
+
except Exception:
|
|
392
|
+
# The file doesn't exist yet or job initialization failed
|
|
393
|
+
if self.job_complete(job):
|
|
394
|
+
message = (
|
|
395
|
+
f"Remote jupyter server job {job.job_name} with id {job.job_id} encountered a startup and "
|
|
396
|
+
f"was terminated prematurely."
|
|
397
|
+
)
|
|
398
|
+
console.error(message, RuntimeError)
|
|
399
|
+
|
|
400
|
+
timer.delay_noblock(delay=5, allow_sleep=True) # Waits for 5 seconds before checking again
|
|
401
|
+
else:
|
|
402
|
+
# Only raises timeout error if the while loop is not broken in 120 seconds
|
|
403
|
+
message = (
|
|
404
|
+
f"Remote jupyter server job {job.job_name} with id {job.job_id} did not start within 120 seconds "
|
|
405
|
+
f"from being submitted. Since all jupyter jobs are intended to be interactive and the server is "
|
|
406
|
+
f"busy running other jobs, this job is cancelled. Try again when the server is less busy."
|
|
407
|
+
)
|
|
408
|
+
console.error(message, TimeoutError)
|
|
409
|
+
raise TimeoutError(message) # Fallback to appease mypy
|
|
410
|
+
|
|
411
|
+
console.echo(message=f"{job.job_name} job: Submitted to {self.host}.", level=LogLevel.SUCCESS)
|
|
412
|
+
|
|
413
|
+
# Returns the updated job object
|
|
200
414
|
return job
|
|
201
415
|
|
|
202
|
-
def job_complete(self, job: Job) -> bool:
|
|
416
|
+
def job_complete(self, job: Job | JupyterJob) -> bool:
|
|
203
417
|
"""Returns True if the job managed by the input Job instance has been completed or terminated its runtime due
|
|
204
418
|
to an error.
|
|
205
419
|
|
|
@@ -228,6 +442,24 @@ class Server:
|
|
|
228
442
|
else:
|
|
229
443
|
return False
|
|
230
444
|
|
|
445
|
+
def abort_job(self, job: Job | JupyterJob) -> None:
|
|
446
|
+
"""Aborts the target job if it is currently running on the server.
|
|
447
|
+
|
|
448
|
+
Use this method to immediately abort running or queued jobs, without waiting for the timeout guard. If the job
|
|
449
|
+
is queued, this method will remove it from the SLURM queue. If the job is already terminated, this method will
|
|
450
|
+
do nothing.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
job: The Job object that needs to be aborted.
|
|
454
|
+
"""
|
|
455
|
+
|
|
456
|
+
# Sends the 'scancel' command to the server targeting the specific Job via ID, unless the job is already
|
|
457
|
+
# complete
|
|
458
|
+
if not self.job_complete(job):
|
|
459
|
+
self._client.exec_command(f"scancel {job.job_id}")
|
|
460
|
+
|
|
461
|
+
console.echo(message=f"{job.job_name} job: Aborted.", level=LogLevel.SUCCESS)
|
|
462
|
+
|
|
231
463
|
def pull_file(self, local_file_path: Path, remote_file_path: Path) -> None:
|
|
232
464
|
"""Moves the specified file from the remote server to the local machine.
|
|
233
465
|
|
|
@@ -236,8 +468,10 @@ class Server:
|
|
|
236
468
|
remote_file_path: The path to the target file on the remote server (the file to be copied).
|
|
237
469
|
"""
|
|
238
470
|
sftp = self._client.open_sftp()
|
|
239
|
-
|
|
240
|
-
|
|
471
|
+
try:
|
|
472
|
+
sftp.get(localpath=local_file_path, remotepath=str(remote_file_path))
|
|
473
|
+
finally:
|
|
474
|
+
sftp.close()
|
|
241
475
|
|
|
242
476
|
def push_file(self, local_file_path: Path, remote_file_path: Path) -> None:
|
|
243
477
|
"""Moves the specified file from the remote server to the local machine.
|
|
@@ -247,8 +481,10 @@ class Server:
|
|
|
247
481
|
remote_file_path: The path to the file on the remote server (where to copy the file).
|
|
248
482
|
"""
|
|
249
483
|
sftp = self._client.open_sftp()
|
|
250
|
-
|
|
251
|
-
|
|
484
|
+
try:
|
|
485
|
+
sftp.put(localpath=local_file_path, remotepath=str(remote_file_path))
|
|
486
|
+
finally:
|
|
487
|
+
sftp.close()
|
|
252
488
|
|
|
253
489
|
def remove(self, remote_path: Path, is_dir: bool) -> None:
|
|
254
490
|
"""Removes the specified file or directory from the remote server.
|
|
@@ -258,11 +494,87 @@ class Server:
|
|
|
258
494
|
is_dir: Determines whether the input path represents a directory or a file.
|
|
259
495
|
"""
|
|
260
496
|
sftp = self._client.open_sftp()
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
497
|
+
try:
|
|
498
|
+
if is_dir:
|
|
499
|
+
sftp.rmdir(path=str(remote_path))
|
|
500
|
+
else:
|
|
501
|
+
sftp.unlink(path=str(remote_path))
|
|
502
|
+
finally:
|
|
503
|
+
sftp.close()
|
|
504
|
+
|
|
505
|
+
def create_directory(self, remote_path: Path, parents: bool = True) -> None:
|
|
506
|
+
"""Creates the specified directory tree on the managed remote server via SFTP.
|
|
507
|
+
|
|
508
|
+
This method creates directories on the remote server, with options to create parent directories and handle
|
|
509
|
+
existing directories gracefully.
|
|
510
|
+
|
|
511
|
+
Args:
|
|
512
|
+
remote_path: The absolute path to the directory to create on the remote server, relative to the server
|
|
513
|
+
root.
|
|
514
|
+
parents: Determines whether to create parent directories, if they are missing. Otherwise, if parents do not
|
|
515
|
+
exist, raises a FileNotFoundError.
|
|
516
|
+
|
|
517
|
+
Notes:
|
|
518
|
+
This method silently assumes that it is fine if the directory already exists and treats it as a successful
|
|
519
|
+
runtime end-point.
|
|
520
|
+
"""
|
|
521
|
+
sftp = self._client.open_sftp()
|
|
522
|
+
|
|
523
|
+
try:
|
|
524
|
+
# Converts the target path to string for SFTP operations
|
|
525
|
+
remote_path_str = str(remote_path)
|
|
526
|
+
|
|
527
|
+
if parents:
|
|
528
|
+
# Creates parent directories if needed:
|
|
529
|
+
# Split the path into parts and create each level
|
|
530
|
+
path_parts = Path(remote_path_str).parts
|
|
531
|
+
current_path = ""
|
|
532
|
+
|
|
533
|
+
for part in path_parts:
|
|
534
|
+
# Skips empty path parts
|
|
535
|
+
if not part:
|
|
536
|
+
continue
|
|
537
|
+
|
|
538
|
+
if current_path:
|
|
539
|
+
# Keeps stacking path components on top of the current_path object
|
|
540
|
+
current_path = str(Path(current_path).joinpath(part))
|
|
541
|
+
else:
|
|
542
|
+
# Initially, the current path is empty, so it is set to the first part
|
|
543
|
+
current_path = part
|
|
544
|
+
|
|
545
|
+
try:
|
|
546
|
+
# Checks if directory exists by trying to stat it
|
|
547
|
+
sftp.stat(current_path)
|
|
548
|
+
except FileNotFoundError:
|
|
549
|
+
# If the directory does not exist, creates it
|
|
550
|
+
sftp.mkdir(current_path)
|
|
551
|
+
else:
|
|
552
|
+
# Otherwise, only creates the final directory
|
|
553
|
+
try:
|
|
554
|
+
# Checks if directory already exists
|
|
555
|
+
sftp.stat(remote_path_str)
|
|
556
|
+
except FileNotFoundError:
|
|
557
|
+
# Creates the directory if it does not exist
|
|
558
|
+
sftp.mkdir(remote_path_str)
|
|
559
|
+
|
|
560
|
+
# Ensures sftp connection is closed.
|
|
561
|
+
finally:
|
|
562
|
+
sftp.close()
|
|
563
|
+
|
|
564
|
+
def exists(self, remote_path: Path) -> bool:
|
|
565
|
+
"""Returns True if the target file or directory exists on the remote server."""
|
|
566
|
+
|
|
567
|
+
sftp = self._client.open_sftp()
|
|
568
|
+
try:
|
|
569
|
+
# Checks if the target file or directory exists by trying to stat it
|
|
570
|
+
sftp.stat(str(remote_path))
|
|
571
|
+
|
|
572
|
+
# If the request does not err, returns True (file or directory exists)
|
|
573
|
+
return True
|
|
574
|
+
|
|
575
|
+
# If the directory or file does not exist, returns False
|
|
576
|
+
except FileNotFoundError:
|
|
577
|
+
return False
|
|
266
578
|
|
|
267
579
|
def close(self) -> None:
|
|
268
580
|
"""Closes the SSH connection to the server.
|
|
@@ -274,15 +586,37 @@ class Server:
|
|
|
274
586
|
self._client.close()
|
|
275
587
|
|
|
276
588
|
@property
|
|
277
|
-
def raw_data_root(self) ->
|
|
589
|
+
def raw_data_root(self) -> Path:
|
|
278
590
|
"""Returns the absolute path to the directory used to store the raw data for all Sun lab projects on the server
|
|
279
591
|
accessible through this class.
|
|
280
592
|
"""
|
|
281
|
-
return self._credentials.raw_data_root
|
|
593
|
+
return Path(self._credentials.raw_data_root)
|
|
282
594
|
|
|
283
595
|
@property
|
|
284
|
-
def processed_data_root(self) ->
|
|
596
|
+
def processed_data_root(self) -> Path:
|
|
285
597
|
"""Returns the absolute path to the directory used to store the processed data for all Sun lab projects on the
|
|
286
598
|
server accessible through this class.
|
|
287
599
|
"""
|
|
288
|
-
return self._credentials.processed_data_root
|
|
600
|
+
return Path(self._credentials.processed_data_root)
|
|
601
|
+
|
|
602
|
+
@property
|
|
603
|
+
def user_data_root(self) -> Path:
|
|
604
|
+
"""Returns the absolute path to the directory used to store user-specific data on the server accessible through
|
|
605
|
+
this class."""
|
|
606
|
+
return Path(self._credentials.user_data_root)
|
|
607
|
+
|
|
608
|
+
@property
|
|
609
|
+
def user_working_root(self) -> Path:
|
|
610
|
+
"""Returns the absolute path to the user-specific working (fast) directory on the server accessible through
|
|
611
|
+
this class."""
|
|
612
|
+
return Path(self._credentials.user_working_root)
|
|
613
|
+
|
|
614
|
+
@property
|
|
615
|
+
def host(self) -> str:
|
|
616
|
+
"""Returns the hostname or IP address of the server accessible through this class."""
|
|
617
|
+
return self._credentials.host
|
|
618
|
+
|
|
619
|
+
@property
|
|
620
|
+
def user(self) -> str:
|
|
621
|
+
"""Returns the username used to authenticate with the server."""
|
|
622
|
+
return self._credentials.username
|