sl-shared-assets 1.0.0rc19__py3-none-any.whl → 1.0.0rc21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +27 -27
- sl_shared_assets/__init__.pyi +73 -0
- sl_shared_assets/cli.py +266 -40
- sl_shared_assets/cli.pyi +87 -0
- sl_shared_assets/data_classes/__init__.py +23 -20
- sl_shared_assets/data_classes/__init__.pyi +61 -0
- sl_shared_assets/data_classes/configuration_data.py +407 -26
- sl_shared_assets/data_classes/configuration_data.pyi +194 -0
- sl_shared_assets/data_classes/runtime_data.py +59 -41
- sl_shared_assets/data_classes/runtime_data.pyi +145 -0
- sl_shared_assets/data_classes/session_data.py +168 -914
- sl_shared_assets/data_classes/session_data.pyi +249 -0
- sl_shared_assets/data_classes/surgery_data.py +3 -3
- sl_shared_assets/data_classes/surgery_data.pyi +89 -0
- sl_shared_assets/server/__init__.pyi +8 -0
- sl_shared_assets/server/job.pyi +94 -0
- sl_shared_assets/server/server.pyi +95 -0
- sl_shared_assets/tools/__init__.py +8 -1
- sl_shared_assets/tools/__init__.pyi +15 -0
- sl_shared_assets/tools/ascension_tools.py +27 -26
- sl_shared_assets/tools/ascension_tools.pyi +68 -0
- sl_shared_assets/tools/packaging_tools.py +14 -1
- sl_shared_assets/tools/packaging_tools.pyi +56 -0
- sl_shared_assets/tools/project_management_tools.py +164 -0
- sl_shared_assets/tools/project_management_tools.pyi +48 -0
- sl_shared_assets/tools/transfer_tools.pyi +53 -0
- {sl_shared_assets-1.0.0rc19.dist-info → sl_shared_assets-1.0.0rc21.dist-info}/METADATA +21 -4
- sl_shared_assets-1.0.0rc21.dist-info/RECORD +36 -0
- sl_shared_assets-1.0.0rc21.dist-info/entry_points.txt +8 -0
- sl_shared_assets/suite2p/__init__.py +0 -8
- sl_shared_assets/suite2p/multi_day.py +0 -225
- sl_shared_assets/suite2p/single_day.py +0 -563
- sl_shared_assets-1.0.0rc19.dist-info/RECORD +0 -23
- sl_shared_assets-1.0.0rc19.dist-info/entry_points.txt +0 -4
- {sl_shared_assets-1.0.0rc19.dist-info → sl_shared_assets-1.0.0rc21.dist-info}/WHEEL +0 -0
- {sl_shared_assets-1.0.0rc19.dist-info → sl_shared_assets-1.0.0rc21.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from dataclasses import field, dataclass
|
|
3
|
+
|
|
4
|
+
from _typeshed import Incomplete
|
|
5
|
+
from ataraxis_data_structures import YamlConfig
|
|
6
|
+
|
|
7
|
+
from .configuration_data import get_system_configuration_data as get_system_configuration_data
|
|
8
|
+
|
|
9
|
+
_valid_session_types: Incomplete
|
|
10
|
+
|
|
11
|
+
@dataclass()
|
|
12
|
+
class ProjectConfiguration(YamlConfig):
|
|
13
|
+
"""Stores the project-specific configuration parameters that do not change between different animals and runtime
|
|
14
|
+
sessions.
|
|
15
|
+
|
|
16
|
+
An instance of this class is generated and saved as a .yaml file in the 'configuration' directory of each project
|
|
17
|
+
when it is created. After that, the stored data is reused for every runtime (training or experiment session) carried
|
|
18
|
+
out for each animal of the project. Additionally, a copy of the most actual configuration file is saved inside each
|
|
19
|
+
runtime session's 'raw_data' folder, providing seamless integration between the managed data and various Sun lab
|
|
20
|
+
(sl-) libraries.
|
|
21
|
+
|
|
22
|
+
Notes:
|
|
23
|
+
Together with SessionData, this class forms the entry point for all interactions with the data acquired in the
|
|
24
|
+
Sun lab. The fields of this class are used to flexibly configure the runtime behavior of major data acquisition
|
|
25
|
+
(sl-experiment) and processing (sl-forgery) libraries, adapting them for any project in the lab.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
project_name: str = ...
|
|
29
|
+
surgery_sheet_id: str = ...
|
|
30
|
+
water_log_sheet_id: str = ...
|
|
31
|
+
@classmethod
|
|
32
|
+
def load(cls, configuration_path: Path) -> ProjectConfiguration:
|
|
33
|
+
"""Loads the project configuration parameters from the specified project_configuration.yaml file.
|
|
34
|
+
|
|
35
|
+
This method is called during each interaction with any runtime session's data, including the creation of a new
|
|
36
|
+
session.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
configuration_path: The path to the project_configuration.yaml file from which to load the data.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
The initialized ProjectConfiguration instance that stores the configuration data for the target project.
|
|
43
|
+
|
|
44
|
+
Raise:
|
|
45
|
+
FileNotFoundError: If the specified configuration file does not exist or is not a valid YAML file.
|
|
46
|
+
"""
|
|
47
|
+
def save(self, path: Path) -> None:
|
|
48
|
+
"""Saves class instance data to disk as a project_configuration.yaml file.
|
|
49
|
+
|
|
50
|
+
This method is automatically called from the 'sl_experiment' library when a new project is created. After this
|
|
51
|
+
method's runtime, all future project initialization calls will use the load() method to reuse configuration data
|
|
52
|
+
saved to the .yaml file created by this method.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
path: The path to the .yaml file to save the data to.
|
|
56
|
+
"""
|
|
57
|
+
def _verify_data(self) -> None:
|
|
58
|
+
"""Verifies the user-modified data loaded from the project_configuration.yaml file.
|
|
59
|
+
|
|
60
|
+
Since this class is explicitly designed to be modified by the user, this verification step is carried out to
|
|
61
|
+
ensure that the loaded data matches expectations. This reduces the potential for user errors to impact the
|
|
62
|
+
runtime behavior of the libraries using this class. This internal method is automatically called by the load()
|
|
63
|
+
method.
|
|
64
|
+
|
|
65
|
+
Raises:
|
|
66
|
+
ValueError: If the loaded data does not match expected formats or values.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
@dataclass()
|
|
70
|
+
class RawData:
|
|
71
|
+
"""Stores the paths to the directories and files that make up the 'raw_data' session-specific directory.
|
|
72
|
+
|
|
73
|
+
The raw_data directory stores the data acquired during the session runtime before and after preprocessing. Since
|
|
74
|
+
preprocessing does not alter the data, any data in that folder is considered 'raw'.
|
|
75
|
+
|
|
76
|
+
Notes:
|
|
77
|
+
Sun lab data management strategy primarily relies on keeping multiple redundant copies of the raw_data for
|
|
78
|
+
each acquired session. Typically, one copy is stored on the lab's processing server and the other is stored on
|
|
79
|
+
the NAS.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
raw_data_path: Path = ...
|
|
83
|
+
camera_data_path: Path = ...
|
|
84
|
+
mesoscope_data_path: Path = ...
|
|
85
|
+
behavior_data_path: Path = ...
|
|
86
|
+
zaber_positions_path: Path = ...
|
|
87
|
+
session_descriptor_path: Path = ...
|
|
88
|
+
hardware_state_path: Path = ...
|
|
89
|
+
surgery_metadata_path: Path = ...
|
|
90
|
+
project_configuration_path: Path = ...
|
|
91
|
+
session_data_path: Path = ...
|
|
92
|
+
experiment_configuration_path: Path = ...
|
|
93
|
+
mesoscope_positions_path: Path = ...
|
|
94
|
+
window_screenshot_path: Path = ...
|
|
95
|
+
system_configuration_path: Path = ...
|
|
96
|
+
checksum_path: Path = ...
|
|
97
|
+
telomere_path: Path = ...
|
|
98
|
+
ubiquitin_path: Path = ...
|
|
99
|
+
def resolve_paths(self, root_directory_path: Path) -> None:
|
|
100
|
+
"""Resolves all paths managed by the class instance based on the input root directory path.
|
|
101
|
+
|
|
102
|
+
This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
|
|
103
|
+
machine that instantiates the class.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
|
|
107
|
+
hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
|
|
108
|
+
the managed session.
|
|
109
|
+
"""
|
|
110
|
+
def make_directories(self) -> None:
|
|
111
|
+
"""Ensures that all major subdirectories and the root directory exist, creating any missing directories."""
|
|
112
|
+
|
|
113
|
+
@dataclass()
|
|
114
|
+
class ProcessedData:
|
|
115
|
+
"""Stores the paths to the directories and files that make up the 'processed_data' session-specific directory.
|
|
116
|
+
|
|
117
|
+
The processed_data directory stores the data generated by various processing pipelines from the raw data (contents
|
|
118
|
+
of the raw_data directory). Processed data represents an intermediate step between raw data and the dataset used in
|
|
119
|
+
the data analysis, but is not itself designed to be analyzed.
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
processed_data_path: Path = ...
|
|
123
|
+
camera_data_path: Path = ...
|
|
124
|
+
mesoscope_data_path: Path = ...
|
|
125
|
+
behavior_data_path: Path = ...
|
|
126
|
+
job_logs_path: Path = ...
|
|
127
|
+
single_day_suite2p_bin_path: Path = ...
|
|
128
|
+
multi_day_suite2p_bin_path: Path = ...
|
|
129
|
+
behavior_bin_path: Path = ...
|
|
130
|
+
dlc_bin_path: Path = ...
|
|
131
|
+
def resolve_paths(self, root_directory_path: Path) -> None:
|
|
132
|
+
"""Resolves all paths managed by the class instance based on the input root directory path.
|
|
133
|
+
|
|
134
|
+
This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
|
|
135
|
+
machine that instantiates the class.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
|
|
139
|
+
hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
|
|
140
|
+
the managed session.
|
|
141
|
+
"""
|
|
142
|
+
def make_directories(self) -> None:
|
|
143
|
+
"""Ensures that all major subdirectories and the root directory exist, creating any missing directories."""
|
|
144
|
+
|
|
145
|
+
@dataclass
|
|
146
|
+
class SessionData(YamlConfig):
|
|
147
|
+
"""Stores and manages the data layout of a single training or experiment session acquired in the Sun lab.
|
|
148
|
+
|
|
149
|
+
The primary purpose of this class is to maintain the session data structure across all supported destinations and
|
|
150
|
+
during all processing stages. It generates the paths used by all other classes from all Sun lab libraries that
|
|
151
|
+
interact with the session's data from the point of its creation and until the data is integrated into an
|
|
152
|
+
analysis dataset.
|
|
153
|
+
|
|
154
|
+
When necessary, the class can be used to either generate a new session or load the layout of an already existing
|
|
155
|
+
session. When the class is used to create a new session, it generates the new session's name using the current
|
|
156
|
+
UTC timestamp, accurate to microseconds. This ensures that each session name is unique and preserves the overall
|
|
157
|
+
session order.
|
|
158
|
+
|
|
159
|
+
Notes:
|
|
160
|
+
This class is specifically designed for working with the data from a single session, performed by a single
|
|
161
|
+
animal under the specific experiment. The class is used to manage both raw and processed data. It follows the
|
|
162
|
+
data through acquisition, preprocessing and processing stages of the Sun lab data workflow. Together with
|
|
163
|
+
ProjectConfiguration class, this class serves as an entry point for all interactions with the managed session's
|
|
164
|
+
data.
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
project_name: str
|
|
168
|
+
animal_id: str
|
|
169
|
+
session_name: str
|
|
170
|
+
session_type: str
|
|
171
|
+
acquisition_system: str
|
|
172
|
+
experiment_name: str | None
|
|
173
|
+
raw_data: RawData = field(default_factory=Incomplete)
|
|
174
|
+
processed_data: ProcessedData = field(default_factory=Incomplete)
|
|
175
|
+
@classmethod
|
|
176
|
+
def create(
|
|
177
|
+
cls,
|
|
178
|
+
project_name: str,
|
|
179
|
+
animal_id: str,
|
|
180
|
+
session_type: str,
|
|
181
|
+
experiment_name: str | None = None,
|
|
182
|
+
session_name: str | None = None,
|
|
183
|
+
) -> SessionData:
|
|
184
|
+
"""Creates a new SessionData object and generates the new session's data structure on the local PC.
|
|
185
|
+
|
|
186
|
+
This method is intended to be called exclusively by the sl-experiment library to create new training or
|
|
187
|
+
experiment sessions and generate the session data directory tree.
|
|
188
|
+
|
|
189
|
+
Notes:
|
|
190
|
+
To load an already existing session data structure, use the load() method instead.
|
|
191
|
+
|
|
192
|
+
This method automatically dumps the data of the created SessionData instance into the session_data.yaml file
|
|
193
|
+
inside the root raw_data directory of the created hierarchy. It also finds and dumps other configuration
|
|
194
|
+
files, such as project_configuration.yaml, experiment_configuration.yaml, and system_configuration.yaml into
|
|
195
|
+
the same raw_data directory. This ensures that if the session's runtime is interrupted unexpectedly, the
|
|
196
|
+
acquired data can still be processed.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
project_name: The name of the project for which the data is acquired.
|
|
200
|
+
animal_id: The ID code of the animal for which the data is acquired.
|
|
201
|
+
session_type: The type of the session. Primarily, this determines how to read the session_descriptor.yaml
|
|
202
|
+
file. Valid options are 'Lick training', 'Run training', 'Window checking', or 'Experiment'.
|
|
203
|
+
experiment_name: The name of the experiment executed during managed session. This optional argument is only
|
|
204
|
+
used for 'Experiment' session types. It is used to find the experiment configuration .YAML file.
|
|
205
|
+
session_name: An optional session_name override. Generally, this argument should not be provided for most
|
|
206
|
+
sessions. When provided, the method uses this name instead of generating a new timestamp-based name.
|
|
207
|
+
This is only used during the 'ascension' runtime to convert old data structures to the modern
|
|
208
|
+
lab standards.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
An initialized SessionData instance that stores the layout of the newly created session's data.
|
|
212
|
+
"""
|
|
213
|
+
@classmethod
|
|
214
|
+
def load(
|
|
215
|
+
cls, session_path: Path, processed_data_root: Path | None = None, make_processed_data_directory: bool = False
|
|
216
|
+
) -> SessionData:
|
|
217
|
+
"""Loads the SessionData instance from the target session's session_data.yaml file.
|
|
218
|
+
|
|
219
|
+
This method is used to load the data layout information of an already existing session. Primarily, this is used
|
|
220
|
+
when preprocessing or processing session data. Due to how SessionData is stored and used in the lab, this
|
|
221
|
+
method always loads the data layout from the session_data.yaml file stored inside the raw_data session
|
|
222
|
+
subfolder. Currently, all interactions with Sun lab data require access to the 'raw_data' folder.
|
|
223
|
+
|
|
224
|
+
Notes:
|
|
225
|
+
To create a new session, use the create() method instead.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
session_path: The path to the root directory of an existing session, e.g.: root/project/animal/session.
|
|
229
|
+
processed_data_root: If processed data is kept on a drive different from the one that stores raw data,
|
|
230
|
+
provide the path to the root project directory (directory that stores all Sun lab projects) on that
|
|
231
|
+
drive. The method will automatically resolve the project/animal/session/processed_data hierarchy using
|
|
232
|
+
this root path. If raw and processed data are kept on the same drive, keep this set to None.
|
|
233
|
+
make_processed_data_directory: Determines whether this method should create processed_data directory if it
|
|
234
|
+
does not exist.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
An initialized SessionData instance for the session whose data is stored at the provided path.
|
|
238
|
+
|
|
239
|
+
Raises:
|
|
240
|
+
FileNotFoundError: If the 'session_data.yaml' file is not found under the session_path/raw_data/ subfolder.
|
|
241
|
+
|
|
242
|
+
"""
|
|
243
|
+
def _save(self) -> None:
|
|
244
|
+
"""Saves the instance data to the 'raw_data' directory of the managed session as a 'session_data.yaml' file.
|
|
245
|
+
|
|
246
|
+
This is used to save the data stored in the instance to disk, so that it can be reused during preprocessing or
|
|
247
|
+
data processing. The method is intended to only be used by the SessionData instance itself during its
|
|
248
|
+
create() method runtime.
|
|
249
|
+
"""
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""This module provides classes to store animal surgery data. This is used to store the data extracted from the Sun lab
|
|
2
|
-
surgery log, so that subject surgery data is always kept together with training and experiment data."""
|
|
2
|
+
surgery log, so that subject (animal) surgery data is always kept together with training and experiment data."""
|
|
3
3
|
|
|
4
4
|
from dataclasses import dataclass
|
|
5
5
|
|
|
@@ -130,10 +130,10 @@ class DrugData:
|
|
|
130
130
|
|
|
131
131
|
@dataclass
|
|
132
132
|
class SurgeryData(YamlConfig):
|
|
133
|
-
"""Stores the data about a single
|
|
133
|
+
"""Stores the data about a single animal surgical intervention.
|
|
134
134
|
|
|
135
135
|
This class aggregates other dataclass instances that store specific data about the surgical procedure. Primarily, it
|
|
136
|
-
is used to save the data as a .yaml file to every session's raw_data directory of each animal used in every lab
|
|
136
|
+
is used to save the data as a .yaml file to every session's 'raw_data' directory of each animal used in every lab
|
|
137
137
|
project. This way, the surgery data is always stored alongside the behavior and brain activity data collected
|
|
138
138
|
during the session.
|
|
139
139
|
"""
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from ataraxis_data_structures import YamlConfig
|
|
4
|
+
|
|
5
|
+
@dataclass()
|
|
6
|
+
class SubjectData:
|
|
7
|
+
"""Stores the ID information of the surgical intervention's subject (animal)."""
|
|
8
|
+
|
|
9
|
+
id: int
|
|
10
|
+
ear_punch: str
|
|
11
|
+
sex: str
|
|
12
|
+
genotype: str
|
|
13
|
+
date_of_birth_us: int
|
|
14
|
+
weight_g: float
|
|
15
|
+
cage: int
|
|
16
|
+
location_housed: str
|
|
17
|
+
status: str
|
|
18
|
+
|
|
19
|
+
@dataclass()
|
|
20
|
+
class ProcedureData:
|
|
21
|
+
"""Stores the general information about the surgical intervention."""
|
|
22
|
+
|
|
23
|
+
surgery_start_us: int
|
|
24
|
+
surgery_end_us: int
|
|
25
|
+
surgeon: str
|
|
26
|
+
protocol: str
|
|
27
|
+
surgery_notes: str
|
|
28
|
+
post_op_notes: str
|
|
29
|
+
surgery_quality: int = ...
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class ImplantData:
|
|
33
|
+
"""Stores the information about a single implantation performed during the surgical intervention.
|
|
34
|
+
|
|
35
|
+
Multiple ImplantData instances are used at the same time if the surgery involved multiple implants.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
implant: str
|
|
39
|
+
implant_target: str
|
|
40
|
+
implant_code: int
|
|
41
|
+
implant_ap_coordinate_mm: float
|
|
42
|
+
implant_ml_coordinate_mm: float
|
|
43
|
+
implant_dv_coordinate_mm: float
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class InjectionData:
|
|
47
|
+
"""Stores the information about a single injection performed during surgical intervention.
|
|
48
|
+
|
|
49
|
+
Multiple InjectionData instances are used at the same time if the surgery involved multiple injections.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
injection: str
|
|
53
|
+
injection_target: str
|
|
54
|
+
injection_volume_nl: float
|
|
55
|
+
injection_code: int
|
|
56
|
+
injection_ap_coordinate_mm: float
|
|
57
|
+
injection_ml_coordinate_mm: float
|
|
58
|
+
injection_dv_coordinate_mm: float
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class DrugData:
|
|
62
|
+
"""Stores the information about all drugs administered to the subject before, during, and immediately after the
|
|
63
|
+
surgical intervention.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
lactated_ringers_solution_volume_ml: float
|
|
67
|
+
lactated_ringers_solution_code: int
|
|
68
|
+
ketoprofen_volume_ml: float
|
|
69
|
+
ketoprofen_code: int
|
|
70
|
+
buprenorphine_volume_ml: float
|
|
71
|
+
buprenorphine_code: int
|
|
72
|
+
dexamethasone_volume_ml: float
|
|
73
|
+
dexamethasone_code: int
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class SurgeryData(YamlConfig):
|
|
77
|
+
"""Stores the data about a single animal surgical intervention.
|
|
78
|
+
|
|
79
|
+
This class aggregates other dataclass instances that store specific data about the surgical procedure. Primarily, it
|
|
80
|
+
is used to save the data as a .yaml file to every session's 'raw_data' directory of each animal used in every lab
|
|
81
|
+
project. This way, the surgery data is always stored alongside the behavior and brain activity data collected
|
|
82
|
+
during the session.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
subject: SubjectData
|
|
86
|
+
procedure: ProcedureData
|
|
87
|
+
drugs: DrugData
|
|
88
|
+
implants: list[ImplantData]
|
|
89
|
+
injections: list[InjectionData]
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from _typeshed import Incomplete
|
|
4
|
+
from simple_slurm import Slurm
|
|
5
|
+
|
|
6
|
+
class Job:
|
|
7
|
+
"""Aggregates the data of a single SLURM-managed job to be executed on the Sun lab BioHPC cluster.
|
|
8
|
+
|
|
9
|
+
This class provides the API for constructing any server-side job in the Sun lab. Internally, it wraps an instance
|
|
10
|
+
of a Slurm class to package the job data into the format expected by the SLURM job manager. All jobs managed by this
|
|
11
|
+
class instance should be submitted to an initialized Server class 'submit_job' method to be executed on the server.
|
|
12
|
+
|
|
13
|
+
Notes:
|
|
14
|
+
The initialization method of the class contains the arguments for configuring the SLURM and Conda environments
|
|
15
|
+
used by the job. Do not submit additional SLURM or Conda commands via the 'add_command' method, as this may
|
|
16
|
+
produce unexpected behavior.
|
|
17
|
+
|
|
18
|
+
Each job can be conceptualized as a sequence of shell instructions to execute on the remote compute server. For
|
|
19
|
+
the lab, that means that the bulk of the command consists of calling various CLIs exposed by data processing or
|
|
20
|
+
analysis pipelines, installed in the Conda environment on the server. Other than that, the job contains commands
|
|
21
|
+
for activating the target conda environment and, in some cases, doing other preparatory or cleanup work. The
|
|
22
|
+
source code of a 'remote' job is typically identical to what a human operator would type in a 'local' terminal
|
|
23
|
+
to run the same job on their PC.
|
|
24
|
+
|
|
25
|
+
A key feature of server-side jobs is that they are executed on virtual machines managed by SLURM. Since the
|
|
26
|
+
server has a lot more compute and memory resources than likely needed by individual jobs, each job typically
|
|
27
|
+
requests a subset of these resources. Upon being executed, SLURM creates an isolated environment with the
|
|
28
|
+
requested resources and runs the job in that environment.
|
|
29
|
+
|
|
30
|
+
Since all jobs are expected to use the CLIs from python packages (pre)installed on the BioHPC server, make sure
|
|
31
|
+
that the target environment is installed and configured before submitting jobs to the server. See notes in
|
|
32
|
+
ReadMe to learn more about configuring server-side conda environments.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
job_name: The descriptive name of the SLURM job to be created. Primarily, this name is used in terminal
|
|
36
|
+
printouts to identify the job to human operators.
|
|
37
|
+
output_log: The absolute path to the .txt file on the processing server, where to store the standard output
|
|
38
|
+
data of the job.
|
|
39
|
+
error_log: The absolute path to the .txt file on the processing server, where to store the standard error
|
|
40
|
+
data of the job.
|
|
41
|
+
working_directory: The absolute path to the directory where temporary job files will be stored. During runtime,
|
|
42
|
+
classes from this library use that directory to store files such as the job's shell script. All such files
|
|
43
|
+
are automatically removed from the directory at the end of a non-errors runtime.
|
|
44
|
+
conda_environment: The name of the conda environment to activate on the server before running the job logic. The
|
|
45
|
+
environment should contain the necessary Python packages and CLIs to support running the job's logic.
|
|
46
|
+
cpus_to_use: The number of CPUs to use for the job.
|
|
47
|
+
ram_gb: The amount of RAM to allocate for the job, in Gigabytes.
|
|
48
|
+
time_limit: The maximum time limit for the job, in minutes. If the job is still running at the end of this time
|
|
49
|
+
period, it will be forcibly terminated. It is highly advised to always set adequate maximum runtime limits
|
|
50
|
+
to prevent jobs from hogging the server in case of runtime or algorithm errors.
|
|
51
|
+
|
|
52
|
+
Attributes:
|
|
53
|
+
remote_script_path: Stores the path to the script file relative to the root of the remote server that runs the
|
|
54
|
+
command.
|
|
55
|
+
job_id: Stores the unique job identifier assigned by the SLURM manager to this job, when it is accepted for
|
|
56
|
+
execution. This field initialized to None and is overwritten by the Server class that submits the job.
|
|
57
|
+
job_name: Stores the descriptive name of the SLURM job.
|
|
58
|
+
_command: Stores the managed SLURM command object.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
remote_script_path: Incomplete
|
|
62
|
+
job_id: str | None
|
|
63
|
+
job_name: str
|
|
64
|
+
_command: Slurm
|
|
65
|
+
def __init__(
|
|
66
|
+
self,
|
|
67
|
+
job_name: str,
|
|
68
|
+
output_log: Path,
|
|
69
|
+
error_log: Path,
|
|
70
|
+
working_directory: Path,
|
|
71
|
+
conda_environment: str,
|
|
72
|
+
cpus_to_use: int = 10,
|
|
73
|
+
ram_gb: int = 10,
|
|
74
|
+
time_limit: int = 60,
|
|
75
|
+
) -> None: ...
|
|
76
|
+
def __repr__(self) -> str:
|
|
77
|
+
"""Returns the string representation of the Job instance."""
|
|
78
|
+
def add_command(self, command: str) -> None:
|
|
79
|
+
"""Adds the input command string to the end of the managed SLURM job command list.
|
|
80
|
+
|
|
81
|
+
This method is a wrapper around simple_slurm's 'add_cmd' method. It is used to iteratively build the shell
|
|
82
|
+
command sequence of the job.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
command: The command string to add to the command list, e.g.: 'python main.py --input 1'.
|
|
86
|
+
"""
|
|
87
|
+
@property
|
|
88
|
+
def command_script(self) -> str:
|
|
89
|
+
"""Translates the managed job data into a shell-script-writable string and returns it to caller.
|
|
90
|
+
|
|
91
|
+
This method is used by the Server class to translate the job into the format that can be submitted to and
|
|
92
|
+
executed on the remote compute server. Do not call this method manually unless you know what you are doing.
|
|
93
|
+
The returned string is safe to dump into a .sh (shell script) file and move to the BioHPC server for execution.
|
|
94
|
+
"""
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
from simple_slurm import Slurm as Slurm
|
|
5
|
+
from paramiko.client import SSHClient as SSHClient
|
|
6
|
+
from ataraxis_data_structures import YamlConfig
|
|
7
|
+
|
|
8
|
+
from .job import Job as Job
|
|
9
|
+
|
|
10
|
+
def generate_server_credentials(
|
|
11
|
+
output_directory: Path, username: str, password: str, host: str = "cbsuwsun.biohpc.cornell.edu"
|
|
12
|
+
) -> None:
|
|
13
|
+
"""Generates a new server_credentials.yaml file under the specified directory, using input information.
|
|
14
|
+
|
|
15
|
+
This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
|
|
16
|
+
only used when setting up new host-computers in the lab.
|
|
17
|
+
"""
|
|
18
|
+
@dataclass()
|
|
19
|
+
class ServerCredentials(YamlConfig):
|
|
20
|
+
"""This class stores the hostname and credentials used to log into the BioHPC cluster to run Sun lab processing
|
|
21
|
+
pipelines.
|
|
22
|
+
|
|
23
|
+
Primarily, this is used as part of the sl-experiment library runtime to start data processing once it is
|
|
24
|
+
transferred to the BioHPC server during preprocessing. However, the same file can be used together with the Server
|
|
25
|
+
class API to run any computation jobs on the lab's BioHPC server.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
username: str = ...
|
|
29
|
+
password: str = ...
|
|
30
|
+
host: str = ...
|
|
31
|
+
|
|
32
|
+
class Server:
|
|
33
|
+
"""Encapsulates access to the Sun lab BioHPC processing server.
|
|
34
|
+
|
|
35
|
+
This class provides the API that allows accessing the BioHPC server to create and submit various SLURM-managed jobs
|
|
36
|
+
to the server. It functions as the central interface used by all processing pipelines in the lab to execute costly
|
|
37
|
+
data processing on the server.
|
|
38
|
+
|
|
39
|
+
Notes:
|
|
40
|
+
All lab processing pipelines expect the data to be stored on the server and all processing logic to be packaged
|
|
41
|
+
and installed into dedicated conda environments on the server.
|
|
42
|
+
|
|
43
|
+
This class assumes that the target server has SLURM job manager installed and accessible to the user whose
|
|
44
|
+
credentials are used to connect to the server as part of this class instantiation.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
credentials_path: The path to the locally stored .yaml file that contains the server hostname and access
|
|
48
|
+
credentials.
|
|
49
|
+
|
|
50
|
+
Attributes:
|
|
51
|
+
_open: Tracks whether the connection to the server is open or not.
|
|
52
|
+
_client: Stores the initialized SSHClient instance used to interface with the server.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
_open: bool
|
|
56
|
+
_credentials: ServerCredentials
|
|
57
|
+
_client: SSHClient
|
|
58
|
+
def __init__(self, credentials_path: Path) -> None: ...
|
|
59
|
+
def __del__(self) -> None:
|
|
60
|
+
"""If the instance is connected to the server, terminates the connection before the instance is destroyed."""
|
|
61
|
+
def submit_job(self, job: Job) -> Job:
|
|
62
|
+
"""Submits the input job to the managed BioHPC server via SLURM job manager.
|
|
63
|
+
|
|
64
|
+
This method submits various jobs for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
|
|
65
|
+
method translates the Job object into the shell script, moves the script to the target working directory on
|
|
66
|
+
the server, and instructs the server to execute the shell script (via SLURM).
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
job: The Job object that contains all job data.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
The job object whose 'job_id' attribute had been modified with the job ID, if the job was successfully
|
|
73
|
+
submitted.
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
RuntimeError: If job submission to the server fails.
|
|
77
|
+
"""
|
|
78
|
+
def job_complete(self, job: Job) -> bool:
|
|
79
|
+
"""Returns True if the job managed by the input Job instance has been completed or terminated its runtime due
|
|
80
|
+
to an error.
|
|
81
|
+
|
|
82
|
+
If the job is still running or is waiting inside the execution queue, returns False.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
job: The Job object whose status needs to be checked.
|
|
86
|
+
|
|
87
|
+
Raises:
|
|
88
|
+
ValueError: If the input Job object does not contain a valid job_id, suggesting that it has not been
|
|
89
|
+
submitted to the server.
|
|
90
|
+
"""
|
|
91
|
+
def close(self) -> None:
|
|
92
|
+
"""Closes the SSH connection to the server.
|
|
93
|
+
|
|
94
|
+
This method has to be called before destroying the class instance to ensure proper resource cleanup.
|
|
95
|
+
"""
|
|
@@ -4,5 +4,12 @@ integrity of the data. The tools from this package are used by most other data p
|
|
|
4
4
|
from .transfer_tools import transfer_directory
|
|
5
5
|
from .ascension_tools import ascend_tyche_data
|
|
6
6
|
from .packaging_tools import calculate_directory_checksum
|
|
7
|
+
from .project_management_tools import verify_session_checksum, generate_project_manifest
|
|
7
8
|
|
|
8
|
-
__all__ = [
|
|
9
|
+
__all__ = [
|
|
10
|
+
"transfer_directory",
|
|
11
|
+
"calculate_directory_checksum",
|
|
12
|
+
"ascend_tyche_data",
|
|
13
|
+
"verify_session_checksum",
|
|
14
|
+
"generate_project_manifest",
|
|
15
|
+
]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .transfer_tools import transfer_directory as transfer_directory
|
|
2
|
+
from .ascension_tools import ascend_tyche_data as ascend_tyche_data
|
|
3
|
+
from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
|
|
4
|
+
from .project_management_tools import (
|
|
5
|
+
verify_session_checksum as verify_session_checksum,
|
|
6
|
+
generate_project_manifest as generate_project_manifest,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"transfer_directory",
|
|
11
|
+
"calculate_directory_checksum",
|
|
12
|
+
"ascend_tyche_data",
|
|
13
|
+
"verify_session_checksum",
|
|
14
|
+
"generate_project_manifest",
|
|
15
|
+
]
|