sl-shared-assets 4.0.0__py3-none-any.whl → 5.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +45 -42
- sl_shared_assets/command_line_interfaces/__init__.py +3 -0
- sl_shared_assets/command_line_interfaces/configure.py +173 -0
- sl_shared_assets/command_line_interfaces/manage.py +226 -0
- sl_shared_assets/data_classes/__init__.py +33 -32
- sl_shared_assets/data_classes/configuration_data.py +267 -79
- sl_shared_assets/data_classes/runtime_data.py +11 -11
- sl_shared_assets/data_classes/session_data.py +226 -289
- sl_shared_assets/data_classes/surgery_data.py +6 -6
- sl_shared_assets/server/__init__.py +24 -4
- sl_shared_assets/server/job.py +6 -7
- sl_shared_assets/server/pipeline.py +570 -0
- sl_shared_assets/server/server.py +57 -25
- sl_shared_assets/tools/__init__.py +9 -8
- sl_shared_assets/tools/packaging_tools.py +14 -25
- sl_shared_assets/tools/project_management_tools.py +602 -523
- sl_shared_assets/tools/transfer_tools.py +88 -23
- {sl_shared_assets-4.0.0.dist-info → sl_shared_assets-5.0.0.dist-info}/METADATA +46 -202
- sl_shared_assets-5.0.0.dist-info/RECORD +23 -0
- sl_shared_assets-5.0.0.dist-info/entry_points.txt +3 -0
- sl_shared_assets/__init__.pyi +0 -91
- sl_shared_assets/cli.py +0 -500
- sl_shared_assets/cli.pyi +0 -106
- sl_shared_assets/data_classes/__init__.pyi +0 -75
- sl_shared_assets/data_classes/configuration_data.pyi +0 -235
- sl_shared_assets/data_classes/runtime_data.pyi +0 -157
- sl_shared_assets/data_classes/session_data.pyi +0 -379
- sl_shared_assets/data_classes/surgery_data.pyi +0 -89
- sl_shared_assets/server/__init__.pyi +0 -11
- sl_shared_assets/server/job.pyi +0 -205
- sl_shared_assets/server/server.pyi +0 -298
- sl_shared_assets/tools/__init__.pyi +0 -19
- sl_shared_assets/tools/ascension_tools.py +0 -265
- sl_shared_assets/tools/ascension_tools.pyi +0 -68
- sl_shared_assets/tools/packaging_tools.pyi +0 -58
- sl_shared_assets/tools/project_management_tools.pyi +0 -239
- sl_shared_assets/tools/transfer_tools.pyi +0 -53
- sl_shared_assets-4.0.0.dist-info/RECORD +0 -36
- sl_shared_assets-4.0.0.dist-info/entry_points.txt +0 -7
- {sl_shared_assets-4.0.0.dist-info → sl_shared_assets-5.0.0.dist-info}/WHEEL +0 -0
- {sl_shared_assets-4.0.0.dist-info → sl_shared_assets-5.0.0.dist-info}/licenses/LICENSE +0 -0
sl_shared_assets/server/job.pyi
DELETED
|
@@ -1,205 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
from _typeshed import Incomplete
|
|
5
|
-
from simple_slurm import Slurm
|
|
6
|
-
|
|
7
|
-
@dataclass
|
|
8
|
-
class _JupyterConnectionInfo:
|
|
9
|
-
"""Stores the data used to establish the connection with a Jupyter notebook server running under SLURM control on a
|
|
10
|
-
remote Sun lab server.
|
|
11
|
-
|
|
12
|
-
More specifically, this class is used to transfer the connection metadata collected on the remote server back to
|
|
13
|
-
the local machine that requested the server to be established.
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
compute_node: str
|
|
17
|
-
port: int
|
|
18
|
-
token: str
|
|
19
|
-
@property
|
|
20
|
-
def localhost_url(self) -> str:
|
|
21
|
-
"""Returns the localhost URL for connecting to the server.
|
|
22
|
-
|
|
23
|
-
To use this URL, first set up an SSH tunnel to the server via the specific Jupyter communication port and the
|
|
24
|
-
remote server access credentials.
|
|
25
|
-
"""
|
|
26
|
-
|
|
27
|
-
class Job:
|
|
28
|
-
"""Aggregates the data of a single SLURM-managed job to be executed on the Sun lab BioHPC cluster.
|
|
29
|
-
|
|
30
|
-
This class provides the API for constructing any server-side job in the Sun lab. Internally, it wraps an instance
|
|
31
|
-
of a Slurm class to package the job data into the format expected by the SLURM job manager. All jobs managed by this
|
|
32
|
-
class instance should be submitted to an initialized Server class 'submit_job' method to be executed on the server.
|
|
33
|
-
|
|
34
|
-
Notes:
|
|
35
|
-
The initialization method of the class contains the arguments for configuring the SLURM and Conda environments
|
|
36
|
-
used by the job. Do not submit additional SLURM or Conda commands via the 'add_command' method, as this may
|
|
37
|
-
produce unexpected behavior.
|
|
38
|
-
|
|
39
|
-
Each job can be conceptualized as a sequence of shell instructions to execute on the remote compute server. For
|
|
40
|
-
the lab, that means that the bulk of the command consists of calling various CLIs exposed by data processing or
|
|
41
|
-
analysis pipelines, installed in the Conda environment on the server. Other than that, the job contains commands
|
|
42
|
-
for activating the target conda environment and, in some cases, doing other preparatory or cleanup work. The
|
|
43
|
-
source code of a 'remote' job is typically identical to what a human operator would type in a 'local' terminal
|
|
44
|
-
to run the same job on their PC.
|
|
45
|
-
|
|
46
|
-
A key feature of server-side jobs is that they are executed on virtual machines managed by SLURM. Since the
|
|
47
|
-
server has a lot more compute and memory resources than likely needed by individual jobs, each job typically
|
|
48
|
-
requests a subset of these resources. Upon being executed, SLURM creates an isolated environment with the
|
|
49
|
-
requested resources and runs the job in that environment.
|
|
50
|
-
|
|
51
|
-
Since all jobs are expected to use the CLIs from python packages (pre)installed on the BioHPC server, make sure
|
|
52
|
-
that the target environment is installed and configured before submitting jobs to the server. See notes in
|
|
53
|
-
ReadMe to learn more about configuring server-side conda environments.
|
|
54
|
-
|
|
55
|
-
Args:
|
|
56
|
-
job_name: The descriptive name of the SLURM job to be created. Primarily, this name is used in terminal
|
|
57
|
-
printouts to identify the job to human operators.
|
|
58
|
-
output_log: The absolute path to the .txt file on the processing server, where to store the standard output
|
|
59
|
-
data of the job.
|
|
60
|
-
error_log: The absolute path to the .txt file on the processing server, where to store the standard error
|
|
61
|
-
data of the job.
|
|
62
|
-
working_directory: The absolute path to the directory where temporary job files will be stored. During runtime,
|
|
63
|
-
classes from this library use that directory to store files such as the job's shell script. All such files
|
|
64
|
-
are automatically removed from the directory at the end of a non-errors runtime.
|
|
65
|
-
conda_environment: The name of the conda environment to activate on the server before running the job logic. The
|
|
66
|
-
environment should contain the necessary Python packages and CLIs to support running the job's logic.
|
|
67
|
-
cpus_to_use: The number of CPUs to use for the job.
|
|
68
|
-
ram_gb: The amount of RAM to allocate for the job, in Gigabytes.
|
|
69
|
-
time_limit: The maximum time limit for the job, in minutes. If the job is still running at the end of this time
|
|
70
|
-
period, it will be forcibly terminated. It is highly advised to always set adequate maximum runtime limits
|
|
71
|
-
to prevent jobs from hogging the server in case of runtime or algorithm errors.
|
|
72
|
-
|
|
73
|
-
Attributes:
|
|
74
|
-
remote_script_path: Stores the path to the script file relative to the root of the remote server that runs the
|
|
75
|
-
command.
|
|
76
|
-
job_id: Stores the unique job identifier assigned by the SLURM manager to this job when it is accepted for
|
|
77
|
-
execution. This field is initialized to None and is overwritten by the Server class that submits the job.
|
|
78
|
-
job_name: Stores the descriptive name of the SLURM job.
|
|
79
|
-
_command: Stores the managed SLURM command object.
|
|
80
|
-
"""
|
|
81
|
-
|
|
82
|
-
remote_script_path: Incomplete
|
|
83
|
-
job_id: str | None
|
|
84
|
-
job_name: str
|
|
85
|
-
_command: Slurm
|
|
86
|
-
def __init__(
|
|
87
|
-
self,
|
|
88
|
-
job_name: str,
|
|
89
|
-
output_log: Path,
|
|
90
|
-
error_log: Path,
|
|
91
|
-
working_directory: Path,
|
|
92
|
-
conda_environment: str,
|
|
93
|
-
cpus_to_use: int = 10,
|
|
94
|
-
ram_gb: int = 10,
|
|
95
|
-
time_limit: int = 60,
|
|
96
|
-
) -> None: ...
|
|
97
|
-
def __repr__(self) -> str:
|
|
98
|
-
"""Returns the string representation of the Job instance."""
|
|
99
|
-
def add_command(self, command: str) -> None:
|
|
100
|
-
"""Adds the input command string to the end of the managed SLURM job command list.
|
|
101
|
-
|
|
102
|
-
This method is a wrapper around simple_slurm's 'add_cmd' method. It is used to iteratively build the shell
|
|
103
|
-
command sequence of the job.
|
|
104
|
-
|
|
105
|
-
Args:
|
|
106
|
-
command: The command string to add to the command list, e.g.: 'python main.py --input 1'.
|
|
107
|
-
"""
|
|
108
|
-
@property
|
|
109
|
-
def command_script(self) -> str:
|
|
110
|
-
"""Translates the managed job data into a shell-script-writable string and returns it to caller.
|
|
111
|
-
|
|
112
|
-
This method is used by the Server class to translate the job into the format that can be submitted to and
|
|
113
|
-
executed on the remote compute server. Do not call this method manually unless you know what you are doing.
|
|
114
|
-
The returned string is safe to dump into a .sh (shell script) file and move to the BioHPC server for execution.
|
|
115
|
-
"""
|
|
116
|
-
|
|
117
|
-
class JupyterJob(Job):
|
|
118
|
-
"""Specialized Job instance designed to launch a Jupyter notebook server on SLURM.
|
|
119
|
-
|
|
120
|
-
This class extends the base Job class to include Jupyter-specific configuration and commands for starting a
|
|
121
|
-
notebook server in a SLURM environment. Using this specialized job allows users to set up remote Jupyter servers
|
|
122
|
-
while still benefitting from SLURM's job management and fair airtime policies.
|
|
123
|
-
|
|
124
|
-
Notes:
|
|
125
|
-
Jupyter servers directly compete for resources with headless data processing jobs. Therefore, it is important
|
|
126
|
-
to minimize the resource footprint and the runtime of each Jupyter server, if possible.
|
|
127
|
-
|
|
128
|
-
Args:
|
|
129
|
-
job_name: The descriptive name of the Jupyter SLURM job to be created. Primarily, this name is used in terminal
|
|
130
|
-
printouts to identify the job to human operators.
|
|
131
|
-
output_log: The absolute path to the .txt file on the processing server, where to store the standard output
|
|
132
|
-
data of the job.
|
|
133
|
-
error_log: The absolute path to the .txt file on the processing server, where to store the standard error
|
|
134
|
-
data of the job.
|
|
135
|
-
working_directory: The absolute path to the directory where temporary job files will be stored. During runtime,
|
|
136
|
-
classes from this library use that directory to store files such as the job's shell script. All such files
|
|
137
|
-
are automatically removed from the directory at the end of a non-errors runtime.
|
|
138
|
-
conda_environment: The name of the conda environment to activate on the server before running the job logic. The
|
|
139
|
-
environment should contain the necessary Python packages and CLIs to support running the job's logic. For
|
|
140
|
-
Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
|
|
141
|
-
port: The connection port number for the Jupyter server. Do not change the default value unless you know what
|
|
142
|
-
you are doing, as the server has most common communication ports closed for security reasons.
|
|
143
|
-
notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have access to
|
|
144
|
-
items stored in or under this directory. For most runtimes, this should be set to the user's root data or
|
|
145
|
-
working directory.
|
|
146
|
-
cpus_to_use: The number of CPUs to allocate to the Jupyter server. Keep this value as small as possible to avoid
|
|
147
|
-
interfering with headless data processing jobs.
|
|
148
|
-
ram_gb: The amount of RAM, in GB, to allocate to the Jupyter server. Keep this value as small as possible to
|
|
149
|
-
avoid interfering with headless data processing jobs.
|
|
150
|
-
time_limit: The maximum Jupyter server uptime, in minutes. Set this to the expected duration of your jupyter
|
|
151
|
-
session.
|
|
152
|
-
jupyter_args: Stores additional arguments to pass to jupyter notebook initialization command.
|
|
153
|
-
|
|
154
|
-
Attributes:
|
|
155
|
-
port: Stores the connection port of the managed Jupyter server.
|
|
156
|
-
notebook_dir: Stores the absolute path to the directory used as Jupyter's root, relative to the remote server
|
|
157
|
-
root.
|
|
158
|
-
connection_info: Stores the JupyterConnectionInfo instance after the Jupyter server is instantiated.
|
|
159
|
-
host: Stores the hostname of the remote server.
|
|
160
|
-
user: Stores the username used to connect with the remote server.
|
|
161
|
-
connection_info_file: The absolute path to the file that stores connection information, relative to the remote
|
|
162
|
-
server root.
|
|
163
|
-
_command: Stores the shell command for launching the Jupyter server.
|
|
164
|
-
"""
|
|
165
|
-
|
|
166
|
-
port: Incomplete
|
|
167
|
-
notebook_dir: Incomplete
|
|
168
|
-
connection_info: _JupyterConnectionInfo | None
|
|
169
|
-
host: str | None
|
|
170
|
-
user: str | None
|
|
171
|
-
connection_info_file: Incomplete
|
|
172
|
-
def __init__(
|
|
173
|
-
self,
|
|
174
|
-
job_name: str,
|
|
175
|
-
output_log: Path,
|
|
176
|
-
error_log: Path,
|
|
177
|
-
working_directory: Path,
|
|
178
|
-
conda_environment: str,
|
|
179
|
-
notebook_directory: Path,
|
|
180
|
-
port: int = 9999,
|
|
181
|
-
cpus_to_use: int = 2,
|
|
182
|
-
ram_gb: int = 32,
|
|
183
|
-
time_limit: int = 120,
|
|
184
|
-
jupyter_args: str = "",
|
|
185
|
-
) -> None: ...
|
|
186
|
-
def _build_jupyter_command(self, jupyter_args: str) -> None:
|
|
187
|
-
"""Builds the command to launch the Jupyter notebook server on the remote Sun lab server."""
|
|
188
|
-
def parse_connection_info(self, info_file: Path) -> None:
|
|
189
|
-
"""Parses the connection information file created by the Jupyter job on the server.
|
|
190
|
-
|
|
191
|
-
Use this method to parse the connection file fetched from the server to finalize setting up the Jupyter
|
|
192
|
-
server job.
|
|
193
|
-
|
|
194
|
-
Args:
|
|
195
|
-
info_file: The path to the .txt file generated by the remote server that stores the Jupyter connection
|
|
196
|
-
information to be parsed.
|
|
197
|
-
"""
|
|
198
|
-
def print_connection_info(self) -> None:
|
|
199
|
-
"""Constructs and displays the command to set up the SSH tunnel to the server and the link to the localhost
|
|
200
|
-
server view in the terminal.
|
|
201
|
-
|
|
202
|
-
The SSH command should be used via a separate terminal or subprocess call to establish the secure SSH tunnel to
|
|
203
|
-
the Jupyter server. Once the SSH tunnel is established, the printed localhost url can be used to view the
|
|
204
|
-
server from the local machine.
|
|
205
|
-
"""
|
|
@@ -1,298 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
from dataclasses import field, dataclass
|
|
3
|
-
|
|
4
|
-
import paramiko
|
|
5
|
-
from _typeshed import Incomplete
|
|
6
|
-
from simple_slurm import Slurm as Slurm
|
|
7
|
-
from paramiko.client import SSHClient as SSHClient
|
|
8
|
-
from ataraxis_data_structures import YamlConfig
|
|
9
|
-
|
|
10
|
-
from .job import (
|
|
11
|
-
Job as Job,
|
|
12
|
-
JupyterJob as JupyterJob,
|
|
13
|
-
)
|
|
14
|
-
|
|
15
|
-
def generate_server_credentials(
|
|
16
|
-
output_directory: Path,
|
|
17
|
-
username: str,
|
|
18
|
-
password: str,
|
|
19
|
-
host: str = "cbsuwsun.biopic.cornell.edu",
|
|
20
|
-
storage_root: str = "/local/workdir",
|
|
21
|
-
working_root: str = "/local/storage",
|
|
22
|
-
shared_directory_name: str = "sun_data",
|
|
23
|
-
) -> None:
|
|
24
|
-
"""Generates a new server_credentials.yaml file under the specified directory, using input information.
|
|
25
|
-
|
|
26
|
-
This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
|
|
27
|
-
only used when setting up new host-computers or users in the lab.
|
|
28
|
-
|
|
29
|
-
Args:
|
|
30
|
-
output_directory: The directory where to save the generated server_credentials.yaml file.
|
|
31
|
-
username: The username to use for server authentication.
|
|
32
|
-
password: The password to use for server authentication.
|
|
33
|
-
host: The hostname or IP address of the server to connect to.
|
|
34
|
-
storage_root: The path to the root storage (slow) server directory. Typically, this is the path to the
|
|
35
|
-
top-level (root) directory of the HDD RAID volume.
|
|
36
|
-
working_root: The path to the root working (fast) server directory. Typically, this is the path to the
|
|
37
|
-
top-level (root) directory of the NVME RAID volume. If the server uses the same volume for both storage and
|
|
38
|
-
working directories, enter the same path under both 'storage_root' and 'working_root'.
|
|
39
|
-
shared_directory_name: The name of the shared directory used to store all Sun lab project data on the storage
|
|
40
|
-
and working server volumes.
|
|
41
|
-
"""
|
|
42
|
-
@dataclass()
|
|
43
|
-
class ServerCredentials(YamlConfig):
|
|
44
|
-
"""This class stores the hostname and credentials used to log into the BioHPC cluster to run Sun lab processing
|
|
45
|
-
pipelines.
|
|
46
|
-
|
|
47
|
-
Primarily, this is used as part of the sl-experiment library runtime to start data processing once it is
|
|
48
|
-
transferred to the BioHPC server during preprocessing. However, the same file can be used together with the Server
|
|
49
|
-
class API to run any computation jobs on the lab's BioHPC server.
|
|
50
|
-
"""
|
|
51
|
-
|
|
52
|
-
username: str = ...
|
|
53
|
-
password: str = ...
|
|
54
|
-
host: str = ...
|
|
55
|
-
storage_root: str = ...
|
|
56
|
-
working_root: str = ...
|
|
57
|
-
shared_directory_name: str = ...
|
|
58
|
-
raw_data_root: str = field(init=False, default_factory=Incomplete)
|
|
59
|
-
processed_data_root: str = field(init=False, default_factory=Incomplete)
|
|
60
|
-
user_data_root: str = field(init=False, default_factory=Incomplete)
|
|
61
|
-
user_working_root: str = field(init=False, default_factory=Incomplete)
|
|
62
|
-
def __post_init__(self) -> None:
|
|
63
|
-
"""Statically resolves the paths to end-point directories using provided root directories."""
|
|
64
|
-
|
|
65
|
-
class Server:
|
|
66
|
-
"""Encapsulates access to the Sun lab BioHPC processing server.
|
|
67
|
-
|
|
68
|
-
This class provides the API that allows accessing the BioHPC server to create and submit various SLURM-managed jobs
|
|
69
|
-
to the server. It functions as the central interface used by all processing pipelines in the lab to execute costly
|
|
70
|
-
data processing on the server.
|
|
71
|
-
|
|
72
|
-
Notes:
|
|
73
|
-
All lab processing pipelines expect the data to be stored on the server and all processing logic to be packaged
|
|
74
|
-
and installed into dedicated conda environments on the server.
|
|
75
|
-
|
|
76
|
-
This class assumes that the target server has SLURM job manager installed and accessible to the user whose
|
|
77
|
-
credentials are used to connect to the server as part of this class instantiation.
|
|
78
|
-
|
|
79
|
-
Args:
|
|
80
|
-
credentials_path: The path to the locally stored .yaml file that contains the server hostname and access
|
|
81
|
-
credentials.
|
|
82
|
-
|
|
83
|
-
Attributes:
|
|
84
|
-
_open: Tracks whether the connection to the server is open or not.
|
|
85
|
-
_client: Stores the initialized SSHClient instance used to interface with the server.
|
|
86
|
-
"""
|
|
87
|
-
|
|
88
|
-
_open: bool
|
|
89
|
-
_credentials: ServerCredentials
|
|
90
|
-
_client: SSHClient
|
|
91
|
-
def __init__(self, credentials_path: Path) -> None: ...
|
|
92
|
-
def __del__(self) -> None:
|
|
93
|
-
"""If the instance is connected to the server, terminates the connection before the instance is destroyed."""
|
|
94
|
-
def create_job(
|
|
95
|
-
self, job_name: str, conda_environment: str, cpus_to_use: int = 10, ram_gb: int = 10, time_limit: int = 60
|
|
96
|
-
) -> Job:
|
|
97
|
-
"""Creates and returns a new Job instance.
|
|
98
|
-
|
|
99
|
-
Use this method to generate Job objects for all headless jobs that need to be run on the remote server. The
|
|
100
|
-
generated Job is a precursor that requires further configuration by the user before it can be submitted to the
|
|
101
|
-
server for execution.
|
|
102
|
-
|
|
103
|
-
Args:
|
|
104
|
-
job_name: The descriptive name of the SLURM job to be created. Primarily, this name is used in terminal
|
|
105
|
-
printouts to identify the job to human operators.
|
|
106
|
-
conda_environment: The name of the conda environment to activate on the server before running the job logic.
|
|
107
|
-
The environment should contain the necessary Python packages and CLIs to support running the job's
|
|
108
|
-
logic.
|
|
109
|
-
cpus_to_use: The number of CPUs to use for the job.
|
|
110
|
-
ram_gb: The amount of RAM to allocate for the job, in Gigabytes.
|
|
111
|
-
time_limit: The maximum time limit for the job, in minutes. If the job is still running at the end of this
|
|
112
|
-
time period, it will be forcibly terminated. It is highly advised to always set adequate maximum runtime
|
|
113
|
-
limits to prevent jobs from hogging the server in case of runtime or algorithm errors.
|
|
114
|
-
|
|
115
|
-
Returns:
|
|
116
|
-
The initialized Job instance pre-filled with SLURM configuration data and conda activation commands. Modify
|
|
117
|
-
the returned instance with any additional commands as necessary for the job to fulfill its intended
|
|
118
|
-
purpose. Note, the Job requires submission via submit_job() to be executed by the server.
|
|
119
|
-
"""
|
|
120
|
-
def launch_jupyter_server(
|
|
121
|
-
self,
|
|
122
|
-
job_name: str,
|
|
123
|
-
conda_environment: str,
|
|
124
|
-
notebook_directory: Path,
|
|
125
|
-
cpus_to_use: int = 2,
|
|
126
|
-
ram_gb: int = 32,
|
|
127
|
-
time_limit: int = 240,
|
|
128
|
-
port: int = 0,
|
|
129
|
-
jupyter_args: str = "",
|
|
130
|
-
) -> JupyterJob:
|
|
131
|
-
"""Launches a Jupyter notebook server on the target remote Sun lab server.
|
|
132
|
-
|
|
133
|
-
Use this method to run interactive Jupyter sessions on the remote server under SLURM control. Unlike the
|
|
134
|
-
create_job(), this method automatically submits the job for execution as part of its runtime. Therefore, the
|
|
135
|
-
returned JupyterJob instance should only be used to query information about how to connect to the remote
|
|
136
|
-
Jupyter server.
|
|
137
|
-
|
|
138
|
-
Args:
|
|
139
|
-
job_name: The descriptive name of the Jupyter SLURM job to be created. Primarily, this name is used in
|
|
140
|
-
terminal printouts to identify the job to human operators.
|
|
141
|
-
conda_environment: The name of the conda environment to activate on the server before running the job logic.
|
|
142
|
-
The environment should contain the necessary Python packages and CLIs to support running the job's
|
|
143
|
-
logic. For Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
|
|
144
|
-
port: The connection port number for the Jupyter server. If set to 0 (default), a random port number between
|
|
145
|
-
8888 and 9999 will be assigned to this connection to reduce the possibility of colliding with other
|
|
146
|
-
user sessions.
|
|
147
|
-
notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have GUI
|
|
148
|
-
access to items stored in or under this directory. For most runtimes, this should be set to the user's
|
|
149
|
-
root data or working directory.
|
|
150
|
-
cpus_to_use: The number of CPUs to allocate to the Jupyter server. Keep this value as small as possible to
|
|
151
|
-
avoid interfering with headless data processing jobs.
|
|
152
|
-
ram_gb: The amount of RAM, in GB, to allocate to the Jupyter server. Keep this value as small as possible to
|
|
153
|
-
avoid interfering with headless data processing jobs.
|
|
154
|
-
time_limit: The maximum Jupyter server uptime, in minutes. Set this to the expected duration of your jupyter
|
|
155
|
-
session.
|
|
156
|
-
jupyter_args: Stores additional arguments to pass to jupyter notebook initialization command.
|
|
157
|
-
|
|
158
|
-
Returns:
|
|
159
|
-
The initialized JupyterJob instance that stores information on how to connect to the created Jupyter server.
|
|
160
|
-
Do NOT re-submit the job to the server, as this is done as part of this method's runtime.
|
|
161
|
-
|
|
162
|
-
Raises:
|
|
163
|
-
TimeoutError: If the target Jupyter server doesn't start within 120 minutes of this method being called.
|
|
164
|
-
RuntimeError: If the job submission fails for any reason.
|
|
165
|
-
"""
|
|
166
|
-
def submit_job(self, job: Job | JupyterJob) -> Job | JupyterJob:
|
|
167
|
-
"""Submits the input job to the managed BioHPC server via SLURM job manager.
|
|
168
|
-
|
|
169
|
-
This method submits various jobs for execution via the SLURM-managed BioHPC cluster. As part of its runtime, the
|
|
170
|
-
method translates the Job object into the shell script, moves the script to the target working directory on
|
|
171
|
-
the server, and instructs the server to execute the shell script (via SLURM).
|
|
172
|
-
|
|
173
|
-
Args:
|
|
174
|
-
job: The Job object that contains all job data.
|
|
175
|
-
|
|
176
|
-
Returns:
|
|
177
|
-
The job object whose 'job_id' attribute had been modified with the job ID if the job was successfully
|
|
178
|
-
submitted.
|
|
179
|
-
|
|
180
|
-
Raises:
|
|
181
|
-
RuntimeError: If job submission to the server fails.
|
|
182
|
-
"""
|
|
183
|
-
def job_complete(self, job: Job | JupyterJob) -> bool:
|
|
184
|
-
"""Returns True if the job managed by the input Job instance has been completed or terminated its runtime due
|
|
185
|
-
to an error.
|
|
186
|
-
|
|
187
|
-
If the job is still running or is waiting inside the execution queue, the method returns False.
|
|
188
|
-
|
|
189
|
-
Args:
|
|
190
|
-
job: The Job object whose status needs to be checked.
|
|
191
|
-
|
|
192
|
-
Raises:
|
|
193
|
-
ValueError: If the input Job object does not contain a valid job_id, suggesting that it has not been
|
|
194
|
-
submitted to the server.
|
|
195
|
-
"""
|
|
196
|
-
def abort_job(self, job: Job | JupyterJob) -> None:
|
|
197
|
-
"""Aborts the target job if it is currently running on the server.
|
|
198
|
-
|
|
199
|
-
Use this method to immediately abort running or queued jobs without waiting for the timeout guard. If the job
|
|
200
|
-
is queued, this method will remove it from the SLURM queue. If the job is already terminated, this method will
|
|
201
|
-
do nothing.
|
|
202
|
-
|
|
203
|
-
Args:
|
|
204
|
-
job: The Job object that needs to be aborted.
|
|
205
|
-
"""
|
|
206
|
-
def pull_file(self, local_file_path: Path, remote_file_path: Path) -> None:
|
|
207
|
-
"""Moves the specified file from the remote server to the local machine.
|
|
208
|
-
|
|
209
|
-
Args:
|
|
210
|
-
local_file_path: The path to the local instance of the file (where to copy the file).
|
|
211
|
-
remote_file_path: The path to the target file on the remote server (the file to be copied).
|
|
212
|
-
"""
|
|
213
|
-
def push_file(self, local_file_path: Path, remote_file_path: Path) -> None:
|
|
214
|
-
"""Moves the specified file from the remote server to the local machine.
|
|
215
|
-
|
|
216
|
-
Args:
|
|
217
|
-
local_file_path: The path to the file that needs to be copied to the remote server.
|
|
218
|
-
remote_file_path: The path to the file on the remote server (where to copy the file).
|
|
219
|
-
"""
|
|
220
|
-
def pull_directory(self, local_directory_path: Path, remote_directory_path: Path) -> None:
|
|
221
|
-
"""Recursively downloads the entire target directory from the remote server to the local machine.
|
|
222
|
-
|
|
223
|
-
Args:
|
|
224
|
-
local_directory_path: The path to the local directory where the remote directory will be copied.
|
|
225
|
-
remote_directory_path: The path to the directory on the remote server to be downloaded.
|
|
226
|
-
"""
|
|
227
|
-
def push_directory(self, local_directory_path: Path, remote_directory_path: Path) -> None:
|
|
228
|
-
"""Recursively uploads the entire target directory from the local machine to the remote server.
|
|
229
|
-
|
|
230
|
-
Args:
|
|
231
|
-
local_directory_path: The path to the local directory to be uploaded.
|
|
232
|
-
remote_directory_path: The path on the remote server where the directory will be copied.
|
|
233
|
-
"""
|
|
234
|
-
def remove(self, remote_path: Path, is_dir: bool, recursive: bool = False) -> None:
|
|
235
|
-
"""Removes the specified file or directory from the remote server.
|
|
236
|
-
|
|
237
|
-
Args:
|
|
238
|
-
remote_path: The path to the file or directory on the remote server to be removed.
|
|
239
|
-
is_dir: Determines whether the input path represents a directory or a file.
|
|
240
|
-
recursive: If True and is_dir is True, recursively deletes all contents of the directory
|
|
241
|
-
before removing it. If False, only removes empty directories (standard rmdir behavior).
|
|
242
|
-
"""
|
|
243
|
-
def _recursive_remove(self, sftp: paramiko.SFTPClient, remote_path: Path) -> None:
|
|
244
|
-
"""Recursively removes a directory and all its contents.
|
|
245
|
-
|
|
246
|
-
This worker method is used by the user-facing remove() method to recursively remove non-empty directories.
|
|
247
|
-
|
|
248
|
-
Args:
|
|
249
|
-
sftp: The SFTP client instance to use for remove operations.
|
|
250
|
-
remote_path: The path to the remote directory to recursively remove.
|
|
251
|
-
"""
|
|
252
|
-
def create_directory(self, remote_path: Path, parents: bool = True) -> None:
|
|
253
|
-
"""Creates the specified directory tree on the managed remote server via SFTP.
|
|
254
|
-
|
|
255
|
-
This method creates directories on the remote server, with options to create parent directories and handle
|
|
256
|
-
existing directories gracefully.
|
|
257
|
-
|
|
258
|
-
Args:
|
|
259
|
-
remote_path: The absolute path to the directory to create on the remote server, relative to the server
|
|
260
|
-
root.
|
|
261
|
-
parents: Determines whether to create parent directories, if they are missing. Otherwise, if parents do not
|
|
262
|
-
exist, raises a FileNotFoundError.
|
|
263
|
-
|
|
264
|
-
Notes:
|
|
265
|
-
This method silently assumes that it is fine if the directory already exists and treats it as a successful
|
|
266
|
-
runtime end-point.
|
|
267
|
-
"""
|
|
268
|
-
def exists(self, remote_path: Path) -> bool:
|
|
269
|
-
"""Returns True if the target file or directory exists on the remote server."""
|
|
270
|
-
def close(self) -> None:
|
|
271
|
-
"""Closes the SSH connection to the server.
|
|
272
|
-
|
|
273
|
-
This method has to be called before destroying the class instance to ensure proper resource cleanup.
|
|
274
|
-
"""
|
|
275
|
-
@property
|
|
276
|
-
def raw_data_root(self) -> Path:
|
|
277
|
-
"""Returns the absolute path to the directory used to store the raw data for all Sun lab projects on the server
|
|
278
|
-
accessible through this class.
|
|
279
|
-
"""
|
|
280
|
-
@property
|
|
281
|
-
def processed_data_root(self) -> Path:
|
|
282
|
-
"""Returns the absolute path to the directory used to store the processed data for all Sun lab projects on the
|
|
283
|
-
server accessible through this class.
|
|
284
|
-
"""
|
|
285
|
-
@property
|
|
286
|
-
def user_data_root(self) -> Path:
|
|
287
|
-
"""Returns the absolute path to the directory used to store user-specific data on the server accessible through
|
|
288
|
-
this class."""
|
|
289
|
-
@property
|
|
290
|
-
def user_working_root(self) -> Path:
|
|
291
|
-
"""Returns the absolute path to the user-specific working (fast) directory on the server accessible through
|
|
292
|
-
this class."""
|
|
293
|
-
@property
|
|
294
|
-
def host(self) -> str:
|
|
295
|
-
"""Returns the hostname or IP address of the server accessible through this class."""
|
|
296
|
-
@property
|
|
297
|
-
def user(self) -> str:
|
|
298
|
-
"""Returns the username used to authenticate with the server."""
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
from .transfer_tools import transfer_directory as transfer_directory
|
|
2
|
-
from .ascension_tools import ascend_tyche_data as ascend_tyche_data
|
|
3
|
-
from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
|
|
4
|
-
from .project_management_tools import (
|
|
5
|
-
ProjectManifest as ProjectManifest,
|
|
6
|
-
resolve_p53_marker as resolve_p53_marker,
|
|
7
|
-
verify_session_checksum as verify_session_checksum,
|
|
8
|
-
generate_project_manifest as generate_project_manifest,
|
|
9
|
-
)
|
|
10
|
-
|
|
11
|
-
__all__ = [
|
|
12
|
-
"ProjectManifest",
|
|
13
|
-
"transfer_directory",
|
|
14
|
-
"calculate_directory_checksum",
|
|
15
|
-
"ascend_tyche_data",
|
|
16
|
-
"verify_session_checksum",
|
|
17
|
-
"generate_project_manifest",
|
|
18
|
-
"resolve_p53_marker",
|
|
19
|
-
]
|