sl-shared-assets 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (36) hide show
  1. sl_shared_assets/__init__.py +80 -0
  2. sl_shared_assets/__init__.pyi +73 -0
  3. sl_shared_assets/cli.py +384 -0
  4. sl_shared_assets/cli.pyi +94 -0
  5. sl_shared_assets/data_classes/__init__.py +66 -0
  6. sl_shared_assets/data_classes/__init__.pyi +61 -0
  7. sl_shared_assets/data_classes/configuration_data.py +479 -0
  8. sl_shared_assets/data_classes/configuration_data.pyi +199 -0
  9. sl_shared_assets/data_classes/runtime_data.py +251 -0
  10. sl_shared_assets/data_classes/runtime_data.pyi +145 -0
  11. sl_shared_assets/data_classes/session_data.py +625 -0
  12. sl_shared_assets/data_classes/session_data.pyi +252 -0
  13. sl_shared_assets/data_classes/surgery_data.py +152 -0
  14. sl_shared_assets/data_classes/surgery_data.pyi +89 -0
  15. sl_shared_assets/py.typed +0 -0
  16. sl_shared_assets/server/__init__.py +8 -0
  17. sl_shared_assets/server/__init__.pyi +8 -0
  18. sl_shared_assets/server/job.py +140 -0
  19. sl_shared_assets/server/job.pyi +94 -0
  20. sl_shared_assets/server/server.py +214 -0
  21. sl_shared_assets/server/server.pyi +95 -0
  22. sl_shared_assets/tools/__init__.py +15 -0
  23. sl_shared_assets/tools/__init__.pyi +15 -0
  24. sl_shared_assets/tools/ascension_tools.py +277 -0
  25. sl_shared_assets/tools/ascension_tools.pyi +68 -0
  26. sl_shared_assets/tools/packaging_tools.py +148 -0
  27. sl_shared_assets/tools/packaging_tools.pyi +56 -0
  28. sl_shared_assets/tools/project_management_tools.py +201 -0
  29. sl_shared_assets/tools/project_management_tools.pyi +54 -0
  30. sl_shared_assets/tools/transfer_tools.py +119 -0
  31. sl_shared_assets/tools/transfer_tools.pyi +53 -0
  32. sl_shared_assets-1.0.0.dist-info/METADATA +869 -0
  33. sl_shared_assets-1.0.0.dist-info/RECORD +36 -0
  34. sl_shared_assets-1.0.0.dist-info/WHEEL +4 -0
  35. sl_shared_assets-1.0.0.dist-info/entry_points.txt +8 -0
  36. sl_shared_assets-1.0.0.dist-info/licenses/LICENSE +674 -0
@@ -0,0 +1,252 @@
1
+ from pathlib import Path
2
+ from dataclasses import field, dataclass
3
+
4
+ from _typeshed import Incomplete
5
+ from ataraxis_data_structures import YamlConfig
6
+
7
+ from .configuration_data import get_system_configuration_data as get_system_configuration_data
8
+
9
+ _valid_session_types: Incomplete
10
+
11
+ @dataclass()
12
+ class ProjectConfiguration(YamlConfig):
13
+ """Stores the project-specific configuration parameters that do not change between different animals and runtime
14
+ sessions.
15
+
16
+ An instance of this class is generated and saved as a .yaml file in the 'configuration' directory of each project
17
+ when it is created. After that, the stored data is reused for every runtime (training or experiment session) carried
18
+ out for each animal of the project. Additionally, a copy of the most actual configuration file is saved inside each
19
+ runtime session's 'raw_data' folder, providing seamless integration between the managed data and various Sun lab
20
+ (sl-) libraries.
21
+
22
+ Notes:
23
+ Together with SessionData, this class forms the entry point for all interactions with the data acquired in the
24
+ Sun lab. The fields of this class are used to flexibly configure the runtime behavior of major data acquisition
25
+ (sl-experiment) and processing (sl-forgery) libraries, adapting them for any project in the lab.
26
+ """
27
+
28
+ project_name: str = ...
29
+ surgery_sheet_id: str = ...
30
+ water_log_sheet_id: str = ...
31
+ @classmethod
32
+ def load(cls, configuration_path: Path) -> ProjectConfiguration:
33
+ """Loads the project configuration parameters from the specified project_configuration.yaml file.
34
+
35
+ This method is called during each interaction with any runtime session's data, including the creation of a new
36
+ session.
37
+
38
+ Args:
39
+ configuration_path: The path to the project_configuration.yaml file from which to load the data.
40
+
41
+ Returns:
42
+ The initialized ProjectConfiguration instance that stores the configuration data for the target project.
43
+
44
+ Raise:
45
+ FileNotFoundError: If the specified configuration file does not exist or is not a valid YAML file.
46
+ """
47
+ def save(self, path: Path) -> None:
48
+ """Saves class instance data to disk as a project_configuration.yaml file.
49
+
50
+ This method is automatically called from the 'sl_experiment' library when a new project is created. After this
51
+ method's runtime, all future project initialization calls will use the load() method to reuse configuration data
52
+ saved to the .yaml file created by this method.
53
+
54
+ Args:
55
+ path: The path to the .yaml file to save the data to.
56
+ """
57
+ def _verify_data(self) -> None:
58
+ """Verifies the user-modified data loaded from the project_configuration.yaml file.
59
+
60
+ Since this class is explicitly designed to be modified by the user, this verification step is carried out to
61
+ ensure that the loaded data matches expectations. This reduces the potential for user errors to impact the
62
+ runtime behavior of the libraries using this class. This internal method is automatically called by the load()
63
+ method.
64
+
65
+ Raises:
66
+ ValueError: If the loaded data does not match expected formats or values.
67
+ """
68
+
69
+ @dataclass()
70
+ class RawData:
71
+ """Stores the paths to the directories and files that make up the 'raw_data' session-specific directory.
72
+
73
+ The raw_data directory stores the data acquired during the session runtime before and after preprocessing. Since
74
+ preprocessing does not alter the data, any data in that folder is considered 'raw'.
75
+
76
+ Notes:
77
+ Sun lab data management strategy primarily relies on keeping multiple redundant copies of the raw_data for
78
+ each acquired session. Typically, one copy is stored on the lab's processing server and the other is stored on
79
+ the NAS.
80
+ """
81
+
82
+ raw_data_path: Path = ...
83
+ camera_data_path: Path = ...
84
+ mesoscope_data_path: Path = ...
85
+ behavior_data_path: Path = ...
86
+ zaber_positions_path: Path = ...
87
+ session_descriptor_path: Path = ...
88
+ hardware_state_path: Path = ...
89
+ surgery_metadata_path: Path = ...
90
+ project_configuration_path: Path = ...
91
+ session_data_path: Path = ...
92
+ experiment_configuration_path: Path = ...
93
+ mesoscope_positions_path: Path = ...
94
+ window_screenshot_path: Path = ...
95
+ system_configuration_path: Path = ...
96
+ checksum_path: Path = ...
97
+ telomere_path: Path = ...
98
+ ubiquitin_path: Path = ...
99
+ verified_bin_path: Path = ...
100
+ def resolve_paths(self, root_directory_path: Path) -> None:
101
+ """Resolves all paths managed by the class instance based on the input root directory path.
102
+
103
+ This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
104
+ machine that instantiates the class.
105
+
106
+ Args:
107
+ root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
108
+ hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
109
+ the managed session.
110
+ """
111
+ def make_directories(self) -> None:
112
+ """Ensures that all major subdirectories and the root directory exist, creating any missing directories."""
113
+
114
+ @dataclass()
115
+ class ProcessedData:
116
+ """Stores the paths to the directories and files that make up the 'processed_data' session-specific directory.
117
+
118
+ The processed_data directory stores the data generated by various processing pipelines from the raw data (contents
119
+ of the raw_data directory). Processed data represents an intermediate step between raw data and the dataset used in
120
+ the data analysis, but is not itself designed to be analyzed.
121
+ """
122
+
123
+ processed_data_path: Path = ...
124
+ camera_data_path: Path = ...
125
+ mesoscope_data_path: Path = ...
126
+ behavior_data_path: Path = ...
127
+ job_logs_path: Path = ...
128
+ single_day_suite2p_bin_path: Path = ...
129
+ multi_day_suite2p_bin_path: Path = ...
130
+ behavior_bin_path: Path = ...
131
+ dlc_bin_path: Path = ...
132
+ def resolve_paths(self, root_directory_path: Path) -> None:
133
+ """Resolves all paths managed by the class instance based on the input root directory path.
134
+
135
+ This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
136
+ machine that instantiates the class.
137
+
138
+ Args:
139
+ root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
140
+ hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
141
+ the managed session.
142
+ """
143
+ def make_directories(self) -> None:
144
+ """Ensures that all major subdirectories and the root directory exist, creating any missing directories."""
145
+
146
+ @dataclass
147
+ class SessionData(YamlConfig):
148
+ """Stores and manages the data layout of a single training or experiment session acquired in the Sun lab.
149
+
150
+ The primary purpose of this class is to maintain the session data structure across all supported destinations and
151
+ during all processing stages. It generates the paths used by all other classes from all Sun lab libraries that
152
+ interact with the session's data from the point of its creation and until the data is integrated into an
153
+ analysis dataset.
154
+
155
+ When necessary, the class can be used to either generate a new session or load the layout of an already existing
156
+ session. When the class is used to create a new session, it generates the new session's name using the current
157
+ UTC timestamp, accurate to microseconds. This ensures that each session name is unique and preserves the overall
158
+ session order.
159
+
160
+ Notes:
161
+ This class is specifically designed for working with the data from a single session, performed by a single
162
+ animal under the specific experiment. The class is used to manage both raw and processed data. It follows the
163
+ data through acquisition, preprocessing and processing stages of the Sun lab data workflow. Together with
164
+ ProjectConfiguration class, this class serves as an entry point for all interactions with the managed session's
165
+ data.
166
+ """
167
+
168
+ project_name: str
169
+ animal_id: str
170
+ session_name: str
171
+ session_type: str
172
+ acquisition_system: str
173
+ experiment_name: str | None
174
+ raw_data: RawData = field(default_factory=Incomplete)
175
+ processed_data: ProcessedData = field(default_factory=Incomplete)
176
+ def __post_init__(self) -> None:
177
+ """Ensures raw_data and processed_data are always instances of RawData and ProcessedData."""
178
+ @classmethod
179
+ def create(
180
+ cls,
181
+ project_name: str,
182
+ animal_id: str,
183
+ session_type: str,
184
+ experiment_name: str | None = None,
185
+ session_name: str | None = None,
186
+ ) -> SessionData:
187
+ """Creates a new SessionData object and generates the new session's data structure on the local PC.
188
+
189
+ This method is intended to be called exclusively by the sl-experiment library to create new training or
190
+ experiment sessions and generate the session data directory tree.
191
+
192
+ Notes:
193
+ To load an already existing session data structure, use the load() method instead.
194
+
195
+ This method automatically dumps the data of the created SessionData instance into the session_data.yaml file
196
+ inside the root raw_data directory of the created hierarchy. It also finds and dumps other configuration
197
+ files, such as project_configuration.yaml, experiment_configuration.yaml, and system_configuration.yaml into
198
+ the same raw_data directory. This ensures that if the session's runtime is interrupted unexpectedly, the
199
+ acquired data can still be processed.
200
+
201
+ Args:
202
+ project_name: The name of the project for which the data is acquired.
203
+ animal_id: The ID code of the animal for which the data is acquired.
204
+ session_type: The type of the session. Primarily, this determines how to read the session_descriptor.yaml
205
+ file. Valid options are 'Lick training', 'Run training', 'Window checking', or 'Experiment'.
206
+ experiment_name: The name of the experiment executed during managed session. This optional argument is only
207
+ used for 'Experiment' session types. It is used to find the experiment configuration .YAML file.
208
+ session_name: An optional session_name override. Generally, this argument should not be provided for most
209
+ sessions. When provided, the method uses this name instead of generating a new timestamp-based name.
210
+ This is only used during the 'ascension' runtime to convert old data structures to the modern
211
+ lab standards.
212
+
213
+ Returns:
214
+ An initialized SessionData instance that stores the layout of the newly created session's data.
215
+ """
216
+ @classmethod
217
+ def load(
218
+ cls, session_path: Path, processed_data_root: Path | None = None, make_processed_data_directory: bool = False
219
+ ) -> SessionData:
220
+ """Loads the SessionData instance from the target session's session_data.yaml file.
221
+
222
+ This method is used to load the data layout information of an already existing session. Primarily, this is used
223
+ when preprocessing or processing session data. Due to how SessionData is stored and used in the lab, this
224
+ method always loads the data layout from the session_data.yaml file stored inside the raw_data session
225
+ subfolder. Currently, all interactions with Sun lab data require access to the 'raw_data' folder.
226
+
227
+ Notes:
228
+ To create a new session, use the create() method instead.
229
+
230
+ Args:
231
+ session_path: The path to the root directory of an existing session, e.g.: root/project/animal/session.
232
+ processed_data_root: If processed data is kept on a drive different from the one that stores raw data,
233
+ provide the path to the root project directory (directory that stores all Sun lab projects) on that
234
+ drive. The method will automatically resolve the project/animal/session/processed_data hierarchy using
235
+ this root path. If raw and processed data are kept on the same drive, keep this set to None.
236
+ make_processed_data_directory: Determines whether this method should create processed_data directory if it
237
+ does not exist.
238
+
239
+ Returns:
240
+ An initialized SessionData instance for the session whose data is stored at the provided path.
241
+
242
+ Raises:
243
+ FileNotFoundError: If the 'session_data.yaml' file is not found under the session_path/raw_data/ subfolder.
244
+
245
+ """
246
+ def _save(self) -> None:
247
+ """Saves the instance data to the 'raw_data' directory of the managed session as a 'session_data.yaml' file.
248
+
249
+ This is used to save the data stored in the instance to disk, so that it can be reused during preprocessing or
250
+ data processing. The method is intended to only be used by the SessionData instance itself during its
251
+ create() method runtime.
252
+ """
@@ -0,0 +1,152 @@
1
+ """This module provides classes to store animal surgery data. This is used to store the data extracted from the Sun lab
2
+ surgery log, so that subject (animal) surgery data is always kept together with training and experiment data."""
3
+
4
+ from dataclasses import dataclass
5
+
6
+ from ataraxis_data_structures import YamlConfig
7
+
8
+
9
+ @dataclass()
10
+ class SubjectData:
11
+ """Stores the ID information of the surgical intervention's subject (animal)."""
12
+
13
+ id: int
14
+ """Stores the unique ID (name) of the subject. Assumes all animals are given a numeric ID, rather than a string
15
+ name."""
16
+ ear_punch: str
17
+ """Stores the ear tag location of the subject."""
18
+ sex: str
19
+ """Stores the gender of the subject."""
20
+ genotype: str
21
+ """Stores the genotype of the subject."""
22
+ date_of_birth_us: int
23
+ """Stores the date of birth of the subject as the number of microseconds elapsed since UTC epoch onset."""
24
+ weight_g: float
25
+ """Stores the weight of the subject pre-surgery, in grams."""
26
+ cage: int
27
+ """Stores the number of the cage used to house the subject after surgery."""
28
+ location_housed: str
29
+ """Stores the location used to house the subject after the surgery."""
30
+ status: str
31
+ """Stores the current status of the subject (alive / deceased)."""
32
+
33
+
34
+ @dataclass()
35
+ class ProcedureData:
36
+ """Stores the general information about the surgical intervention."""
37
+
38
+ surgery_start_us: int
39
+ """Stores the date and time when the surgery has started as microseconds elapsed since UTC epoch onset."""
40
+ surgery_end_us: int
41
+ """Stores the date and time when the surgery has ended as microseconds elapsed since UTC epoch onset."""
42
+ surgeon: str
43
+ """Stores the name or ID of the surgeon. If the intervention was carried out by multiple surgeons, all participating
44
+ surgeon names and IDs are stored as part of the same string."""
45
+ protocol: str
46
+ """Stores the experiment protocol number (ID) used during the surgery."""
47
+ surgery_notes: str
48
+ """Stores surgeon's notes taken during the surgery."""
49
+ post_op_notes: str
50
+ """Stores surgeon's notes taken during the post-surgery recovery period."""
51
+ surgery_quality: int = 0
52
+ """Stores the quality of the surgical intervention as a numeric level. 0 indicates unusable (bad) result, 1
53
+ indicates usable result that is not good enough to be included in a publication, 2 indicates publication-grade
54
+ result."""
55
+
56
+
57
+ @dataclass
58
+ class ImplantData:
59
+ """Stores the information about a single implantation performed during the surgical intervention.
60
+
61
+ Multiple ImplantData instances are used at the same time if the surgery involved multiple implants.
62
+ """
63
+
64
+ implant: str
65
+ """The descriptive name of the implant."""
66
+ implant_target: str
67
+ """The name of the brain region or cranium section targeted by the implant."""
68
+ implant_code: int
69
+ """The manufacturer code or internal reference code for the implant. This code is used to identify the implant in
70
+ additional datasheets and lab ordering documents."""
71
+ implant_ap_coordinate_mm: float
72
+ """Stores implant's antero-posterior stereotactic coordinate, in millimeters, relative to bregma."""
73
+ implant_ml_coordinate_mm: float
74
+ """Stores implant's medial-lateral stereotactic coordinate, in millimeters, relative to bregma."""
75
+ implant_dv_coordinate_mm: float
76
+ """Stores implant's dorsal-ventral stereotactic coordinate, in millimeters, relative to bregma."""
77
+
78
+
79
+ @dataclass
80
+ class InjectionData:
81
+ """Stores the information about a single injection performed during surgical intervention.
82
+
83
+ Multiple InjectionData instances are used at the same time if the surgery involved multiple injections.
84
+ """
85
+
86
+ injection: str
87
+ """The descriptive name of the injection."""
88
+ injection_target: str
89
+ """The name of the brain region targeted by the injection."""
90
+ injection_volume_nl: float
91
+ """The volume of substance, in nanoliters, delivered during the injection."""
92
+ injection_code: int
93
+ """The manufacturer code or internal reference code for the injected substance. This code is used to identify the
94
+ substance in additional datasheets and lab ordering documents."""
95
+ injection_ap_coordinate_mm: float
96
+ """Stores injection's antero-posterior stereotactic coordinate, in millimeters, relative to bregma."""
97
+ injection_ml_coordinate_mm: float
98
+ """Stores injection's medial-lateral stereotactic coordinate, in millimeters, relative to bregma."""
99
+ injection_dv_coordinate_mm: float
100
+ """Stores injection's dorsal-ventral stereotactic coordinate, in millimeters, relative to bregma."""
101
+
102
+
103
+ @dataclass
104
+ class DrugData:
105
+ """Stores the information about all drugs administered to the subject before, during, and immediately after the
106
+ surgical intervention.
107
+ """
108
+
109
+ lactated_ringers_solution_volume_ml: float
110
+ """Stores the volume of Lactated Ringer's Solution (LRS) administered during surgery, in ml."""
111
+ lactated_ringers_solution_code: int
112
+ """Stores the manufacturer code or internal reference code for Lactated Ringer's Solution (LRS). This code is used
113
+ to identify the LRS batch in additional datasheets and lab ordering documents."""
114
+ ketoprofen_volume_ml: float
115
+ """Stores the volume of ketoprofen diluted with saline administered during surgery, in ml."""
116
+ ketoprofen_code: int
117
+ """Stores the manufacturer code or internal reference code for ketoprofen. This code is used to identify the
118
+ ketoprofen batch in additional datasheets and lab ordering documents."""
119
+ buprenorphine_volume_ml: float
120
+ """Stores the volume of buprenorphine diluted with saline administered during surgery, in ml."""
121
+ buprenorphine_code: int
122
+ """Stores the manufacturer code or internal reference code for buprenorphine. This code is used to identify the
123
+ buprenorphine batch in additional datasheets and lab ordering documents."""
124
+ dexamethasone_volume_ml: float
125
+ """Stores the volume of dexamethasone diluted with saline administered during surgery, in ml."""
126
+ dexamethasone_code: int
127
+ """Stores the manufacturer code or internal reference code for dexamethasone. This code is used to identify the
128
+ dexamethasone batch in additional datasheets and lab ordering documents."""
129
+
130
+
131
+ @dataclass
132
+ class SurgeryData(YamlConfig):
133
+ """Stores the data about a single animal surgical intervention.
134
+
135
+ This class aggregates other dataclass instances that store specific data about the surgical procedure. Primarily, it
136
+ is used to save the data as a .yaml file to every session's 'raw_data' directory of each animal used in every lab
137
+ project. This way, the surgery data is always stored alongside the behavior and brain activity data collected
138
+ during the session.
139
+ """
140
+
141
+ subject: SubjectData
142
+ """Stores the ID information about the subject (mouse)."""
143
+ procedure: ProcedureData
144
+ """Stores general data about the surgical intervention."""
145
+ drugs: DrugData
146
+ """Stores the data about the substances subcutaneously injected into the subject before, during and immediately
147
+ after the surgical intervention."""
148
+ implants: list[ImplantData]
149
+ """Stores the data for all cranial and transcranial implants introduced to the subject during the surgical
150
+ intervention."""
151
+ injections: list[InjectionData]
152
+ """Stores the data about all substances infused into the brain of the subject during the surgical intervention."""
@@ -0,0 +1,89 @@
1
+ from dataclasses import dataclass
2
+
3
+ from ataraxis_data_structures import YamlConfig
4
+
5
+ @dataclass()
6
+ class SubjectData:
7
+ """Stores the ID information of the surgical intervention's subject (animal)."""
8
+
9
+ id: int
10
+ ear_punch: str
11
+ sex: str
12
+ genotype: str
13
+ date_of_birth_us: int
14
+ weight_g: float
15
+ cage: int
16
+ location_housed: str
17
+ status: str
18
+
19
+ @dataclass()
20
+ class ProcedureData:
21
+ """Stores the general information about the surgical intervention."""
22
+
23
+ surgery_start_us: int
24
+ surgery_end_us: int
25
+ surgeon: str
26
+ protocol: str
27
+ surgery_notes: str
28
+ post_op_notes: str
29
+ surgery_quality: int = ...
30
+
31
+ @dataclass
32
+ class ImplantData:
33
+ """Stores the information about a single implantation performed during the surgical intervention.
34
+
35
+ Multiple ImplantData instances are used at the same time if the surgery involved multiple implants.
36
+ """
37
+
38
+ implant: str
39
+ implant_target: str
40
+ implant_code: int
41
+ implant_ap_coordinate_mm: float
42
+ implant_ml_coordinate_mm: float
43
+ implant_dv_coordinate_mm: float
44
+
45
+ @dataclass
46
+ class InjectionData:
47
+ """Stores the information about a single injection performed during surgical intervention.
48
+
49
+ Multiple InjectionData instances are used at the same time if the surgery involved multiple injections.
50
+ """
51
+
52
+ injection: str
53
+ injection_target: str
54
+ injection_volume_nl: float
55
+ injection_code: int
56
+ injection_ap_coordinate_mm: float
57
+ injection_ml_coordinate_mm: float
58
+ injection_dv_coordinate_mm: float
59
+
60
+ @dataclass
61
+ class DrugData:
62
+ """Stores the information about all drugs administered to the subject before, during, and immediately after the
63
+ surgical intervention.
64
+ """
65
+
66
+ lactated_ringers_solution_volume_ml: float
67
+ lactated_ringers_solution_code: int
68
+ ketoprofen_volume_ml: float
69
+ ketoprofen_code: int
70
+ buprenorphine_volume_ml: float
71
+ buprenorphine_code: int
72
+ dexamethasone_volume_ml: float
73
+ dexamethasone_code: int
74
+
75
+ @dataclass
76
+ class SurgeryData(YamlConfig):
77
+ """Stores the data about a single animal surgical intervention.
78
+
79
+ This class aggregates other dataclass instances that store specific data about the surgical procedure. Primarily, it
80
+ is used to save the data as a .yaml file to every session's 'raw_data' directory of each animal used in every lab
81
+ project. This way, the surgery data is always stored alongside the behavior and brain activity data collected
82
+ during the session.
83
+ """
84
+
85
+ subject: SubjectData
86
+ procedure: ProcedureData
87
+ drugs: DrugData
88
+ implants: list[ImplantData]
89
+ injections: list[InjectionData]
File without changes
@@ -0,0 +1,8 @@
1
+ """This package provides the classes and methods used by all Sun lab libraries to submit remote jobs to the BioHPC
2
+ and other compute servers. This package is also used across all Sun lab members private code to interface with the
3
+ shared server."""
4
+
5
+ from .job import Job
6
+ from .server import Server, ServerCredentials, generate_server_credentials
7
+
8
+ __all__ = ["Server", "ServerCredentials", "generate_server_credentials", "Job"]
@@ -0,0 +1,8 @@
1
+ from .job import Job as Job
2
+ from .server import (
3
+ Server as Server,
4
+ ServerCredentials as ServerCredentials,
5
+ generate_server_credentials as generate_server_credentials,
6
+ )
7
+
8
+ __all__ = ["Server", "ServerCredentials", "generate_server_credentials", "Job"]
@@ -0,0 +1,140 @@
1
+ """This module provides the core Job class, used as the starting point for all SLURM-managed job executed on lab compute
2
+ server(s). Specifically, the Job class acts as a wrapper around the SLURM configuration and specific logic of each
3
+ job. During runtime, Server class interacts with input job objects to manage their transfer and execution on the
4
+ remote servers."""
5
+
6
+ # noinspection PyProtectedMember
7
+ from pathlib import Path
8
+ import datetime
9
+
10
+ from simple_slurm import Slurm # type: ignore
11
+
12
+
13
+ class Job:
14
+ """Aggregates the data of a single SLURM-managed job to be executed on the Sun lab BioHPC cluster.
15
+
16
+ This class provides the API for constructing any server-side job in the Sun lab. Internally, it wraps an instance
17
+ of a Slurm class to package the job data into the format expected by the SLURM job manager. All jobs managed by this
18
+ class instance should be submitted to an initialized Server class 'submit_job' method to be executed on the server.
19
+
20
+ Notes:
21
+ The initialization method of the class contains the arguments for configuring the SLURM and Conda environments
22
+ used by the job. Do not submit additional SLURM or Conda commands via the 'add_command' method, as this may
23
+ produce unexpected behavior.
24
+
25
+ Each job can be conceptualized as a sequence of shell instructions to execute on the remote compute server. For
26
+ the lab, that means that the bulk of the command consists of calling various CLIs exposed by data processing or
27
+ analysis pipelines, installed in the Conda environment on the server. Other than that, the job contains commands
28
+ for activating the target conda environment and, in some cases, doing other preparatory or cleanup work. The
29
+ source code of a 'remote' job is typically identical to what a human operator would type in a 'local' terminal
30
+ to run the same job on their PC.
31
+
32
+ A key feature of server-side jobs is that they are executed on virtual machines managed by SLURM. Since the
33
+ server has a lot more compute and memory resources than likely needed by individual jobs, each job typically
34
+ requests a subset of these resources. Upon being executed, SLURM creates an isolated environment with the
35
+ requested resources and runs the job in that environment.
36
+
37
+ Since all jobs are expected to use the CLIs from python packages (pre)installed on the BioHPC server, make sure
38
+ that the target environment is installed and configured before submitting jobs to the server. See notes in
39
+ ReadMe to learn more about configuring server-side conda environments.
40
+
41
+ Args:
42
+ job_name: The descriptive name of the SLURM job to be created. Primarily, this name is used in terminal
43
+ printouts to identify the job to human operators.
44
+ output_log: The absolute path to the .txt file on the processing server, where to store the standard output
45
+ data of the job.
46
+ error_log: The absolute path to the .txt file on the processing server, where to store the standard error
47
+ data of the job.
48
+ working_directory: The absolute path to the directory where temporary job files will be stored. During runtime,
49
+ classes from this library use that directory to store files such as the job's shell script. All such files
50
+ are automatically removed from the directory at the end of a non-errors runtime.
51
+ conda_environment: The name of the conda environment to activate on the server before running the job logic. The
52
+ environment should contain the necessary Python packages and CLIs to support running the job's logic.
53
+ cpus_to_use: The number of CPUs to use for the job.
54
+ ram_gb: The amount of RAM to allocate for the job, in Gigabytes.
55
+ time_limit: The maximum time limit for the job, in minutes. If the job is still running at the end of this time
56
+ period, it will be forcibly terminated. It is highly advised to always set adequate maximum runtime limits
57
+ to prevent jobs from hogging the server in case of runtime or algorithm errors.
58
+
59
+ Attributes:
60
+ remote_script_path: Stores the path to the script file relative to the root of the remote server that runs the
61
+ command.
62
+ job_id: Stores the unique job identifier assigned by the SLURM manager to this job, when it is accepted for
63
+ execution. This field initialized to None and is overwritten by the Server class that submits the job.
64
+ job_name: Stores the descriptive name of the SLURM job.
65
+ _command: Stores the managed SLURM command object.
66
+ """
67
+
68
+ def __init__(
69
+ self,
70
+ job_name: str,
71
+ output_log: Path,
72
+ error_log: Path,
73
+ working_directory: Path,
74
+ conda_environment: str,
75
+ cpus_to_use: int = 10,
76
+ ram_gb: int = 10,
77
+ time_limit: int = 60,
78
+ ) -> None:
79
+ # Resolves the paths to the remote (server-side) .sh script file. This is the path where the job script
80
+ # will be stored on the server, once it is transferred by the Server class instance.
81
+ self.remote_script_path = str(working_directory.joinpath(f"{job_name}.sh"))
82
+
83
+ # Defines additional arguments used by the Server class that executed the job.
84
+ self.job_id: str | None = None # This is set by the Server that submits the job.
85
+ self.job_name: str = job_name # Also stores the job name to support more informative terminal prints
86
+
87
+ # Builds the slurm command object filled with configuration information
88
+ self._command: Slurm = Slurm(
89
+ cpus_per_task=cpus_to_use,
90
+ job_name=job_name,
91
+ output=str(output_log),
92
+ error=str(error_log),
93
+ mem=f"{ram_gb}G",
94
+ time=datetime.timedelta(minutes=time_limit),
95
+ )
96
+
97
+ # Conda shell initialization commands
98
+ self._command.add_cmd("eval $(conda shell.bash hook)")
99
+ self._command.add_cmd("conda init bash")
100
+
101
+ # Activates the target conda environment for the command.
102
+ self._command.add_cmd(f"source activate {conda_environment}") # Need to use old syntax for our server.
103
+
104
+ def __repr__(self) -> str:
105
+ """Returns the string representation of the Job instance."""
106
+ return f"Job(name={self.job_name}, id={self.job_id})"
107
+
108
+ def add_command(self, command: str) -> None:
109
+ """Adds the input command string to the end of the managed SLURM job command list.
110
+
111
+ This method is a wrapper around simple_slurm's 'add_cmd' method. It is used to iteratively build the shell
112
+ command sequence of the job.
113
+
114
+ Args:
115
+ command: The command string to add to the command list, e.g.: 'python main.py --input 1'.
116
+ """
117
+
118
+ self._command.add_cmd(command)
119
+
120
+ @property
121
+ def command_script(self) -> str:
122
+ """Translates the managed job data into a shell-script-writable string and returns it to caller.
123
+
124
+ This method is used by the Server class to translate the job into the format that can be submitted to and
125
+ executed on the remote compute server. Do not call this method manually unless you know what you are doing.
126
+ The returned string is safe to dump into a .sh (shell script) file and move to the BioHPC server for execution.
127
+ """
128
+
129
+ # Appends the command to clean up (remove) the temporary script file after processing runtime is over
130
+ self._command.add_cmd(f"rm -f {self.remote_script_path}")
131
+
132
+ # Translates the command to string format
133
+ script_content = str(self._command)
134
+
135
+ # Replaces escaped $ (/$) with $. This is essential, as without this correction, things like conda
136
+ # initialization would not work as expected.
137
+ fixed_script_content = script_content.replace("\\$", "$")
138
+
139
+ # Returns the script content to caller as a string
140
+ return fixed_script_content