sl-shared-assets 2.0.0__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +17 -9
- sl_shared_assets/__init__.pyi +12 -8
- sl_shared_assets/cli.py +266 -20
- sl_shared_assets/cli.pyi +46 -5
- sl_shared_assets/data_classes/__init__.py +8 -3
- sl_shared_assets/data_classes/__init__.pyi +8 -4
- sl_shared_assets/data_classes/configuration_data.py +149 -30
- sl_shared_assets/data_classes/configuration_data.pyi +49 -11
- sl_shared_assets/data_classes/runtime_data.py +70 -49
- sl_shared_assets/data_classes/runtime_data.pyi +41 -33
- sl_shared_assets/data_classes/session_data.py +193 -253
- sl_shared_assets/data_classes/session_data.pyi +99 -116
- sl_shared_assets/data_classes/surgery_data.py +1 -1
- sl_shared_assets/server/__init__.py +2 -2
- sl_shared_assets/server/__init__.pyi +5 -2
- sl_shared_assets/server/job.py +229 -1
- sl_shared_assets/server/job.pyi +111 -0
- sl_shared_assets/server/server.py +431 -31
- sl_shared_assets/server/server.pyi +158 -15
- sl_shared_assets/tools/__init__.py +2 -1
- sl_shared_assets/tools/__init__.pyi +2 -0
- sl_shared_assets/tools/ascension_tools.py +9 -21
- sl_shared_assets/tools/ascension_tools.pyi +1 -1
- sl_shared_assets/tools/packaging_tools.py +2 -2
- sl_shared_assets/tools/project_management_tools.py +147 -41
- sl_shared_assets/tools/project_management_tools.pyi +45 -6
- {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/METADATA +127 -13
- sl_shared_assets-3.0.0.dist-info/RECORD +36 -0
- {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/entry_points.txt +2 -0
- sl_shared_assets-2.0.0.dist-info/RECORD +0 -36
- {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/WHEEL +0 -0
- {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,84 +1,41 @@
|
|
|
1
|
+
from enum import StrEnum
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
from dataclasses import field, dataclass
|
|
3
4
|
|
|
4
5
|
from _typeshed import Incomplete
|
|
5
6
|
from ataraxis_data_structures import YamlConfig
|
|
6
7
|
|
|
7
|
-
from .configuration_data import
|
|
8
|
+
from .configuration_data import (
|
|
9
|
+
AcquisitionSystems as AcquisitionSystems,
|
|
10
|
+
get_system_configuration_data as get_system_configuration_data,
|
|
11
|
+
)
|
|
8
12
|
|
|
9
|
-
|
|
13
|
+
class SessionTypes(StrEnum):
|
|
14
|
+
"""Defines the set of data acquisition session types supported by various data acquisition systems used in the
|
|
15
|
+
Sun lab.
|
|
10
16
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
python_version: str = ...
|
|
16
|
-
sl_experiment_version: str = ...
|
|
17
|
-
|
|
18
|
-
@dataclass()
|
|
19
|
-
class ProjectConfiguration(YamlConfig):
|
|
20
|
-
"""Stores the project-specific configuration parameters that do not change between different animals and runtime
|
|
21
|
-
sessions.
|
|
22
|
-
|
|
23
|
-
An instance of this class is generated and saved as a .yaml file in the 'configuration' directory of each project
|
|
24
|
-
when it is created. After that, the stored data is reused for every runtime (training or experiment session) carried
|
|
25
|
-
out for each animal of the project. Additionally, a copy of the most actual configuration file is saved inside each
|
|
26
|
-
runtime session's 'raw_data' folder, providing seamless integration between the managed data and various Sun lab
|
|
27
|
-
(sl-) libraries.
|
|
17
|
+
A data acquisition session broadly encompasses a recording session carried out to either: acquire experiment data,
|
|
18
|
+
train the animal for the upcoming experiments, or to assess the quality of surgical or other pre-experiment
|
|
19
|
+
intervention.
|
|
28
20
|
|
|
29
21
|
Notes:
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
22
|
+
This enumeration does not differentiate between different acquisition systems. Different acquisition systems
|
|
23
|
+
support different session types, and may not be suited for acquiring some of the session types listed in this
|
|
24
|
+
enumeration.
|
|
33
25
|
"""
|
|
34
26
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def load(cls, configuration_path: Path) -> ProjectConfiguration:
|
|
40
|
-
"""Loads the project configuration parameters from the specified project_configuration.yaml file.
|
|
41
|
-
|
|
42
|
-
This method is called during each interaction with any runtime session's data, including the creation of a new
|
|
43
|
-
session.
|
|
44
|
-
|
|
45
|
-
Args:
|
|
46
|
-
configuration_path: The path to the project_configuration.yaml file from which to load the data.
|
|
47
|
-
|
|
48
|
-
Returns:
|
|
49
|
-
The initialized ProjectConfiguration instance that stores the configuration data for the target project.
|
|
50
|
-
|
|
51
|
-
Raise:
|
|
52
|
-
FileNotFoundError: If the specified configuration file does not exist or is not a valid YAML file.
|
|
53
|
-
"""
|
|
54
|
-
def save(self, path: Path) -> None:
|
|
55
|
-
"""Saves class instance data to disk as a project_configuration.yaml file.
|
|
56
|
-
|
|
57
|
-
This method is automatically called from the 'sl_experiment' library when a new project is created. After this
|
|
58
|
-
method's runtime, all future project initialization calls will use the load() method to reuse configuration data
|
|
59
|
-
saved to the .yaml file created by this method.
|
|
60
|
-
|
|
61
|
-
Args:
|
|
62
|
-
path: The path to the .yaml file to save the data to.
|
|
63
|
-
"""
|
|
64
|
-
def _verify_data(self) -> None:
|
|
65
|
-
"""Verifies the user-modified data loaded from the project_configuration.yaml file.
|
|
66
|
-
|
|
67
|
-
Since this class is explicitly designed to be modified by the user, this verification step is carried out to
|
|
68
|
-
ensure that the loaded data matches expectations. This reduces the potential for user errors to impact the
|
|
69
|
-
runtime behavior of the libraries using this class. This internal method is automatically called by the load()
|
|
70
|
-
method.
|
|
71
|
-
|
|
72
|
-
Raises:
|
|
73
|
-
ValueError: If the loaded data does not match expected formats or values.
|
|
74
|
-
"""
|
|
27
|
+
LICK_TRAINING = "lick training"
|
|
28
|
+
RUN_TRAINING = "run training"
|
|
29
|
+
MESOSCOPE_EXPERIMENT = "mesoscope experiment"
|
|
30
|
+
WINDOW_CHECKING = "window checking"
|
|
75
31
|
|
|
76
32
|
@dataclass()
|
|
77
33
|
class RawData:
|
|
78
34
|
"""Stores the paths to the directories and files that make up the 'raw_data' session-specific directory.
|
|
79
35
|
|
|
80
|
-
The raw_data directory stores the data acquired during the session runtime before and after
|
|
81
|
-
preprocessing does not alter the data, any data in that folder is considered
|
|
36
|
+
The raw_data directory stores the data acquired during the session data acquisition runtime, before and after
|
|
37
|
+
preprocessing. Since preprocessing does not irreversibly alter the data, any data in that folder is considered
|
|
38
|
+
'raw,' event if preprocessing losslessly re-compresses the data for efficient transfer.
|
|
82
39
|
|
|
83
40
|
Notes:
|
|
84
41
|
Sun lab data management strategy primarily relies on keeping multiple redundant copies of the raw_data for
|
|
@@ -94,7 +51,6 @@ class RawData:
|
|
|
94
51
|
session_descriptor_path: Path = ...
|
|
95
52
|
hardware_state_path: Path = ...
|
|
96
53
|
surgery_metadata_path: Path = ...
|
|
97
|
-
project_configuration_path: Path = ...
|
|
98
54
|
session_data_path: Path = ...
|
|
99
55
|
experiment_configuration_path: Path = ...
|
|
100
56
|
mesoscope_positions_path: Path = ...
|
|
@@ -103,21 +59,24 @@ class RawData:
|
|
|
103
59
|
checksum_path: Path = ...
|
|
104
60
|
telomere_path: Path = ...
|
|
105
61
|
ubiquitin_path: Path = ...
|
|
62
|
+
nk_path: Path = ...
|
|
106
63
|
integrity_verification_tracker_path: Path = ...
|
|
107
|
-
version_data_path: Path = ...
|
|
108
64
|
def resolve_paths(self, root_directory_path: Path) -> None:
|
|
109
65
|
"""Resolves all paths managed by the class instance based on the input root directory path.
|
|
110
66
|
|
|
111
|
-
This method is called each time the class is instantiated to regenerate the managed path
|
|
112
|
-
machine that instantiates the class.
|
|
67
|
+
This method is called each time the (wrapper) SessionData class is instantiated to regenerate the managed path
|
|
68
|
+
hierarchy on any machine that instantiates the class.
|
|
113
69
|
|
|
114
70
|
Args:
|
|
115
|
-
root_directory_path: The path to the top-level directory of the
|
|
116
|
-
|
|
117
|
-
the managed session.
|
|
71
|
+
root_directory_path: The path to the top-level directory of the session. Typically, this path is assembled
|
|
72
|
+
using the following hierarchy: root/project/animal/session_id
|
|
118
73
|
"""
|
|
119
74
|
def make_directories(self) -> None:
|
|
120
|
-
"""Ensures that all major subdirectories and the root directory exist, creating any missing directories.
|
|
75
|
+
"""Ensures that all major subdirectories and the root directory exist, creating any missing directories.
|
|
76
|
+
|
|
77
|
+
This method is called each time the (wrapper) SessionData class is instantiated and allowed to generate
|
|
78
|
+
missing data directories.
|
|
79
|
+
"""
|
|
121
80
|
|
|
122
81
|
@dataclass()
|
|
123
82
|
class ProcessedData:
|
|
@@ -132,53 +91,52 @@ class ProcessedData:
|
|
|
132
91
|
camera_data_path: Path = ...
|
|
133
92
|
mesoscope_data_path: Path = ...
|
|
134
93
|
behavior_data_path: Path = ...
|
|
135
|
-
job_logs_path: Path = ...
|
|
136
94
|
suite2p_processing_tracker_path: Path = ...
|
|
137
|
-
dataset_formation_tracker_path: Path = ...
|
|
138
95
|
behavior_processing_tracker_path: Path = ...
|
|
139
96
|
video_processing_tracker_path: Path = ...
|
|
97
|
+
p53_path: Path = ...
|
|
140
98
|
def resolve_paths(self, root_directory_path: Path) -> None:
|
|
141
99
|
"""Resolves all paths managed by the class instance based on the input root directory path.
|
|
142
100
|
|
|
143
|
-
This method is called each time the class is instantiated to regenerate the managed path
|
|
144
|
-
machine that instantiates the class.
|
|
101
|
+
This method is called each time the (wrapper) SessionData class is instantiated to regenerate the managed path
|
|
102
|
+
hierarchy on any machine that instantiates the class.
|
|
145
103
|
|
|
146
104
|
Args:
|
|
147
|
-
root_directory_path: The path to the top-level directory of the
|
|
148
|
-
|
|
149
|
-
the managed session.
|
|
105
|
+
root_directory_path: The path to the top-level directory of the session. Typically, this path is assembled
|
|
106
|
+
using the following hierarchy: root/project/animal/session_id
|
|
150
107
|
"""
|
|
151
108
|
def make_directories(self) -> None:
|
|
152
|
-
"""Ensures that all major subdirectories and the root directory exist, creating any missing directories.
|
|
109
|
+
"""Ensures that all major subdirectories and the root directory exist, creating any missing directories.
|
|
110
|
+
|
|
111
|
+
This method is called each time the (wrapper) SessionData class is instantiated and allowed to generate
|
|
112
|
+
missing data directories.
|
|
113
|
+
"""
|
|
153
114
|
|
|
154
115
|
@dataclass
|
|
155
116
|
class SessionData(YamlConfig):
|
|
156
|
-
"""Stores and manages the data layout of a single
|
|
157
|
-
|
|
158
|
-
The primary purpose of this class is to maintain the session data structure across all supported destinations and
|
|
159
|
-
during all processing stages. It generates the paths used by all other classes from all Sun lab libraries that
|
|
160
|
-
interact with the session's data from the point of its creation and until the data is integrated into an
|
|
161
|
-
analysis dataset.
|
|
117
|
+
"""Stores and manages the data layout of a single Sun lab data acquisition session.
|
|
162
118
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
session
|
|
119
|
+
The primary purpose of this class is to maintain the session data structure across all supported destinations and to
|
|
120
|
+
provide a unified data access interface shared by all Sun lab libraries. The class can be used to either generate a
|
|
121
|
+
new session or load the layout of an already existing session. When the class is used to create a new session, it
|
|
122
|
+
generates the new session's name using the current UTC timestamp, accurate to microseconds. This ensures that each
|
|
123
|
+
session 'name' is unique and preserves the overall session order.
|
|
167
124
|
|
|
168
125
|
Notes:
|
|
169
126
|
This class is specifically designed for working with the data from a single session, performed by a single
|
|
170
127
|
animal under the specific experiment. The class is used to manage both raw and processed data. It follows the
|
|
171
|
-
data through acquisition, preprocessing and processing stages of the Sun lab data workflow.
|
|
172
|
-
|
|
173
|
-
data.
|
|
128
|
+
data through acquisition, preprocessing and processing stages of the Sun lab data workflow. This class serves as
|
|
129
|
+
an entry point for all interactions with the managed session's data.
|
|
174
130
|
"""
|
|
175
131
|
|
|
176
132
|
project_name: str
|
|
177
133
|
animal_id: str
|
|
178
134
|
session_name: str
|
|
179
|
-
session_type: str
|
|
180
|
-
acquisition_system: str
|
|
135
|
+
session_type: str | SessionTypes
|
|
136
|
+
acquisition_system: str | AcquisitionSystems
|
|
181
137
|
experiment_name: str | None
|
|
138
|
+
python_version: str = ...
|
|
139
|
+
sl_experiment_version: str = ...
|
|
182
140
|
raw_data: RawData = field(default_factory=Incomplete)
|
|
183
141
|
processed_data: ProcessedData = field(default_factory=Incomplete)
|
|
184
142
|
def __post_init__(self) -> None:
|
|
@@ -188,9 +146,11 @@ class SessionData(YamlConfig):
|
|
|
188
146
|
cls,
|
|
189
147
|
project_name: str,
|
|
190
148
|
animal_id: str,
|
|
191
|
-
session_type: str,
|
|
149
|
+
session_type: SessionTypes | str,
|
|
192
150
|
experiment_name: str | None = None,
|
|
193
151
|
session_name: str | None = None,
|
|
152
|
+
python_version: str = "3.11.13",
|
|
153
|
+
sl_experiment_version: str = "2.0.0",
|
|
194
154
|
) -> SessionData:
|
|
195
155
|
"""Creates a new SessionData object and generates the new session's data structure on the local PC.
|
|
196
156
|
|
|
@@ -201,22 +161,27 @@ class SessionData(YamlConfig):
|
|
|
201
161
|
To load an already existing session data structure, use the load() method instead.
|
|
202
162
|
|
|
203
163
|
This method automatically dumps the data of the created SessionData instance into the session_data.yaml file
|
|
204
|
-
inside the root raw_data directory of the created hierarchy. It also finds and dumps other configuration
|
|
205
|
-
files, such as
|
|
206
|
-
|
|
207
|
-
|
|
164
|
+
inside the root 'raw_data' directory of the created hierarchy. It also finds and dumps other configuration
|
|
165
|
+
files, such as experiment_configuration.yaml and system_configuration.yaml into the same 'raw_data'
|
|
166
|
+
directory. If the session's runtime is interrupted unexpectedly, the acquired data can still be processed
|
|
167
|
+
using these pre-saved class instances.
|
|
208
168
|
|
|
209
169
|
Args:
|
|
210
|
-
project_name: The name of the project for which the
|
|
211
|
-
animal_id: The ID code of the animal
|
|
212
|
-
session_type: The type of the session.
|
|
213
|
-
|
|
214
|
-
experiment_name: The name of the experiment executed during
|
|
215
|
-
used for
|
|
216
|
-
|
|
170
|
+
project_name: The name of the project for which the session is carried out.
|
|
171
|
+
animal_id: The ID code of the animal participating in the session.
|
|
172
|
+
session_type: The type of the session. Has to be one of the supported session types exposed by the
|
|
173
|
+
SessionTypes enumeration.
|
|
174
|
+
experiment_name: The name of the experiment executed during the session. This optional argument is only
|
|
175
|
+
used for experiment sessions. Note! The name passed to this argument has to match the name of the
|
|
176
|
+
experiment configuration .yaml file.
|
|
177
|
+
session_name: An optional session name override. Generally, this argument should not be provided for most
|
|
217
178
|
sessions. When provided, the method uses this name instead of generating a new timestamp-based name.
|
|
218
179
|
This is only used during the 'ascension' runtime to convert old data structures to the modern
|
|
219
180
|
lab standards.
|
|
181
|
+
python_version: The string that specifies the Python version used to collect session data. Has to be
|
|
182
|
+
specified using the major.minor.patch version format.
|
|
183
|
+
sl_experiment_version: The string that specifies the version of the sl-experiment library used to collect
|
|
184
|
+
session data. Has to be specified using the major.minor.patch version format.
|
|
220
185
|
|
|
221
186
|
Returns:
|
|
222
187
|
An initialized SessionData instance that stores the layout of the newly created session's data.
|
|
@@ -228,9 +193,9 @@ class SessionData(YamlConfig):
|
|
|
228
193
|
"""Loads the SessionData instance from the target session's session_data.yaml file.
|
|
229
194
|
|
|
230
195
|
This method is used to load the data layout information of an already existing session. Primarily, this is used
|
|
231
|
-
when
|
|
232
|
-
|
|
233
|
-
|
|
196
|
+
when processing session data. Due to how SessionData is stored and used in the lab, this method always loads the
|
|
197
|
+
data layout from the session_data.yaml file stored inside the 'raw_data' session subfolder. Currently, all
|
|
198
|
+
interactions with Sun lab data require access to the 'raw_data' folder of each session.
|
|
234
199
|
|
|
235
200
|
Notes:
|
|
236
201
|
To create a new session, use the create() method instead.
|
|
@@ -250,11 +215,18 @@ class SessionData(YamlConfig):
|
|
|
250
215
|
Raises:
|
|
251
216
|
FileNotFoundError: If the 'session_data.yaml' file is not found under the session_path/raw_data/ subfolder.
|
|
252
217
|
|
|
218
|
+
"""
|
|
219
|
+
def runtime_initialized(self) -> None:
|
|
220
|
+
"""Ensures that the 'nk.bin' marker file is removed from the session's raw_data folder.
|
|
221
|
+
|
|
222
|
+
The 'nk.bin' marker is generated as part of the SessionData initialization (creation) process to mark sessions
|
|
223
|
+
that did not fully initialize during runtime. This service method is designed to be called by the sl-experiment
|
|
224
|
+
library classes to remove the 'nk.bin' marker when it is safe to do so. It should not be called by end-users.
|
|
253
225
|
"""
|
|
254
226
|
def _save(self) -> None:
|
|
255
227
|
"""Saves the instance data to the 'raw_data' directory of the managed session as a 'session_data.yaml' file.
|
|
256
228
|
|
|
257
|
-
This is used to save the data stored in the instance to disk, so that it can be reused during
|
|
229
|
+
This is used to save the data stored in the instance to disk, so that it can be reused during further stages of
|
|
258
230
|
data processing. The method is intended to only be used by the SessionData instance itself during its
|
|
259
231
|
create() method runtime.
|
|
260
232
|
"""
|
|
@@ -274,6 +246,13 @@ class ProcessingTracker(YamlConfig):
|
|
|
274
246
|
_is_running: bool = ...
|
|
275
247
|
_lock_path: str = field(init=False)
|
|
276
248
|
def __post_init__(self) -> None: ...
|
|
249
|
+
def __del__(self) -> None:
|
|
250
|
+
"""If the instance is garbage-collected without calling the stop() method, assumes this is due to a runtime
|
|
251
|
+
error.
|
|
252
|
+
|
|
253
|
+
It is essential to always resolve the runtime as either 'stopped' or 'erred' to avoid deadlocking the session
|
|
254
|
+
data.
|
|
255
|
+
"""
|
|
277
256
|
def _load_state(self) -> None:
|
|
278
257
|
"""Reads the current processing state from the wrapped .YAML file."""
|
|
279
258
|
def _save_state(self) -> None:
|
|
@@ -300,7 +279,11 @@ class ProcessingTracker(YamlConfig):
|
|
|
300
279
|
TimeoutError: If the file lock for the target .YAML file cannot be acquired within the timeout period.
|
|
301
280
|
"""
|
|
302
281
|
def stop(self) -> None:
|
|
303
|
-
"""
|
|
282
|
+
"""Configures the tracker file to indicate that the tracked processing runtime has been completed successfully.
|
|
283
|
+
|
|
284
|
+
After this method returns, it is UNSAFE to do any further processing from the process that calls this method.
|
|
285
|
+
Any process that calls the 'start' method of this class is expected to also call this method or 'error' method
|
|
286
|
+
at the end of the runtime.
|
|
304
287
|
|
|
305
288
|
Raises:
|
|
306
289
|
TimeoutError: If the file lock for the target .YAML file cannot be acquired within the timeout period.
|
|
@@ -308,12 +291,12 @@ class ProcessingTracker(YamlConfig):
|
|
|
308
291
|
@property
|
|
309
292
|
def is_complete(self) -> bool:
|
|
310
293
|
"""Returns True if the tracker wrapped by the instance indicates that the processing runtime has been completed
|
|
311
|
-
successfully and
|
|
294
|
+
successfully at least once and that there is no ongoing processing that uses the target session."""
|
|
312
295
|
@property
|
|
313
296
|
def encountered_error(self) -> bool:
|
|
314
|
-
"""Returns True if the tracker wrapped by the instance indicates that the processing runtime
|
|
315
|
-
encountering an error
|
|
297
|
+
"""Returns True if the tracker wrapped by the instance indicates that the processing runtime for the target
|
|
298
|
+
session has aborted due to encountering an error."""
|
|
316
299
|
@property
|
|
317
300
|
def is_running(self) -> bool:
|
|
318
301
|
"""Returns True if the tracker wrapped by the instance indicates that the processing runtime is currently
|
|
319
|
-
running
|
|
302
|
+
running for the target session."""
|
|
@@ -51,7 +51,7 @@ class ProcedureData:
|
|
|
51
51
|
surgery_quality: int = 0
|
|
52
52
|
"""Stores the quality of the surgical intervention as a numeric level. 0 indicates unusable (bad) result, 1
|
|
53
53
|
indicates usable result that is not good enough to be included in a publication, 2 indicates publication-grade
|
|
54
|
-
result."""
|
|
54
|
+
result, 3 indicates high-tier publication grade result."""
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
@dataclass
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
and other compute servers. This package is also used across all Sun lab members private code to interface with the
|
|
3
3
|
shared server."""
|
|
4
4
|
|
|
5
|
-
from .job import Job
|
|
5
|
+
from .job import Job, JupyterJob
|
|
6
6
|
from .server import Server, ServerCredentials, generate_server_credentials
|
|
7
7
|
|
|
8
|
-
__all__ = ["Server", "ServerCredentials", "generate_server_credentials", "Job"]
|
|
8
|
+
__all__ = ["Server", "ServerCredentials", "generate_server_credentials", "Job", "JupyterJob"]
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
-
from .job import
|
|
1
|
+
from .job import (
|
|
2
|
+
Job as Job,
|
|
3
|
+
JupyterJob as JupyterJob,
|
|
4
|
+
)
|
|
2
5
|
from .server import (
|
|
3
6
|
Server as Server,
|
|
4
7
|
ServerCredentials as ServerCredentials,
|
|
5
8
|
generate_server_credentials as generate_server_credentials,
|
|
6
9
|
)
|
|
7
10
|
|
|
8
|
-
__all__ = ["Server", "ServerCredentials", "generate_server_credentials", "Job"]
|
|
11
|
+
__all__ = ["Server", "ServerCredentials", "generate_server_credentials", "Job", "JupyterJob"]
|
sl_shared_assets/server/job.py
CHANGED
|
@@ -1,13 +1,51 @@
|
|
|
1
1
|
"""This module provides the core Job class, used as the starting point for all SLURM-managed job executed on lab compute
|
|
2
2
|
server(s). Specifically, the Job class acts as a wrapper around the SLURM configuration and specific logic of each
|
|
3
3
|
job. During runtime, Server class interacts with input job objects to manage their transfer and execution on the
|
|
4
|
-
remote servers.
|
|
4
|
+
remote servers.
|
|
5
|
+
|
|
6
|
+
Since version 3.0.0, this module also provides the specialized JupyterJob class used to launch remote Jupyter
|
|
7
|
+
notebook servers.
|
|
8
|
+
"""
|
|
5
9
|
|
|
6
10
|
# noinspection PyProtectedMember
|
|
11
|
+
import re
|
|
7
12
|
from pathlib import Path
|
|
8
13
|
import datetime
|
|
14
|
+
from dataclasses import dataclass
|
|
9
15
|
|
|
16
|
+
# noinspection PyProtectedMember
|
|
10
17
|
from simple_slurm import Slurm # type: ignore
|
|
18
|
+
from ataraxis_base_utilities import LogLevel, console
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class _JupyterConnectionInfo:
|
|
23
|
+
"""Stores the data used to establish the connection with a Jupyter notebook server running under SLURM control on a
|
|
24
|
+
remote Sun lab server.
|
|
25
|
+
|
|
26
|
+
More specifically, this class is used to transfer the connection metadata collected on the remote server back to
|
|
27
|
+
the local machine that requested the server to be established.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
compute_node: str
|
|
31
|
+
"""The hostname of the compute node where Jupyter is running."""
|
|
32
|
+
|
|
33
|
+
port: int
|
|
34
|
+
"""The port number on which Jupyter is listening for communication. Usually, this is the default port 8888 or 9999.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
token: str
|
|
38
|
+
"""The authentication token for the Jupyter server. This token is used to authenticate the user when establishing
|
|
39
|
+
communication with the Jupyter server."""
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def localhost_url(self) -> str:
|
|
43
|
+
"""Returns the localhost URL for connecting to the server.
|
|
44
|
+
|
|
45
|
+
To use this URL, first set up an SSH tunnel to the server via the specific Jupyter communication port and the
|
|
46
|
+
remote server access credentials.
|
|
47
|
+
"""
|
|
48
|
+
return f"http://localhost:{self.port}/?token={self.token}"
|
|
11
49
|
|
|
12
50
|
|
|
13
51
|
class Job:
|
|
@@ -138,3 +176,193 @@ class Job:
|
|
|
138
176
|
|
|
139
177
|
# Returns the script content to caller as a string
|
|
140
178
|
return fixed_script_content
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class JupyterJob(Job):
|
|
182
|
+
"""Specialized Job instance designed to launch a Jupyter notebook server on SLURM.
|
|
183
|
+
|
|
184
|
+
This class extends the base Job class to include Jupyter-specific configuration and commands for starting a
|
|
185
|
+
notebook server in a SLURM environment. Using this specialized job allows users to set up remote Jupyter servers
|
|
186
|
+
while still benefitting from SLURM's job management and fair airtime policies.
|
|
187
|
+
|
|
188
|
+
Notes:
|
|
189
|
+
Jupyter servers directly compete for resources with headless data processing jobs. Therefore, it is important
|
|
190
|
+
to minimize the resource footprint and the runtime of each Jupyter server, if possible.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
job_name: The descriptive name of the Jupyter SLURM job to be created. Primarily, this name is used in terminal
|
|
194
|
+
printouts to identify the job to human operators.
|
|
195
|
+
output_log: The absolute path to the .txt file on the processing server, where to store the standard output
|
|
196
|
+
data of the job.
|
|
197
|
+
error_log: The absolute path to the .txt file on the processing server, where to store the standard error
|
|
198
|
+
data of the job.
|
|
199
|
+
working_directory: The absolute path to the directory where temporary job files will be stored. During runtime,
|
|
200
|
+
classes from this library use that directory to store files such as the job's shell script. All such files
|
|
201
|
+
are automatically removed from the directory at the end of a non-errors runtime.
|
|
202
|
+
conda_environment: The name of the conda environment to activate on the server before running the job logic. The
|
|
203
|
+
environment should contain the necessary Python packages and CLIs to support running the job's logic. For
|
|
204
|
+
Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
|
|
205
|
+
port: The connection port number for Jupyter server. Do not change the default value unless you know what you
|
|
206
|
+
are doing, as the server has most common communication ports closed for security reasons.
|
|
207
|
+
notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have access to
|
|
208
|
+
items stored in or under this directory. For most runtimes, this should be set to the user's root data or
|
|
209
|
+
working directory.
|
|
210
|
+
cpus_to_use: The number of CPUs to allocate to the Jupyter server. Keep this value as small as possible to avoid
|
|
211
|
+
interfering with headless data processing jobs.
|
|
212
|
+
ram_gb: The amount of RAM, in GB, to allocate to the Jupyter server. Keep this value as small as possible to
|
|
213
|
+
avoid interfering with headless data processing jobs.
|
|
214
|
+
time_limit: The maximum Jupyter server uptime, in minutes. Set this to the expected duration of your jupyter
|
|
215
|
+
session.
|
|
216
|
+
jupyter_args: Stores additional arguments to pass to jupyter notebook initialization command.
|
|
217
|
+
|
|
218
|
+
Attributes:
|
|
219
|
+
port: Stores the connection port of the managed Jupyter server.
|
|
220
|
+
notebook_dir: Stores the absolute path to the directory used as Jupyter's root, relative to the remote server
|
|
221
|
+
root.
|
|
222
|
+
connection_info: Stores the JupyterConnectionInfo instance after the Jupyter server is instantiated.
|
|
223
|
+
host: Stores the hostname of the remote server.
|
|
224
|
+
user: Stores the username used to connect with the remote server.
|
|
225
|
+
connection_info_file: The absolute path to the file that stores connection information, relative to the remote
|
|
226
|
+
server root.
|
|
227
|
+
_command: Stores the shell command for launching the Jupyter server.
|
|
228
|
+
"""
|
|
229
|
+
|
|
230
|
+
def __init__(
|
|
231
|
+
self,
|
|
232
|
+
job_name: str,
|
|
233
|
+
output_log: Path,
|
|
234
|
+
error_log: Path,
|
|
235
|
+
working_directory: Path,
|
|
236
|
+
conda_environment: str,
|
|
237
|
+
notebook_directory: Path,
|
|
238
|
+
port: int = 9999, # Defaults to using port 9999
|
|
239
|
+
cpus_to_use: int = 2, # Defaults to 2 CPU cores
|
|
240
|
+
ram_gb: int = 32, # Defaults to 32 GB of RAM
|
|
241
|
+
time_limit: int = 120, # Defaults to 2 hours of runtime (120 minutes)
|
|
242
|
+
jupyter_args: str = "",
|
|
243
|
+
) -> None:
|
|
244
|
+
# Initializes parent Job class
|
|
245
|
+
super().__init__(
|
|
246
|
+
job_name=job_name,
|
|
247
|
+
output_log=output_log,
|
|
248
|
+
error_log=error_log,
|
|
249
|
+
working_directory=working_directory,
|
|
250
|
+
conda_environment=conda_environment,
|
|
251
|
+
cpus_to_use=cpus_to_use,
|
|
252
|
+
ram_gb=ram_gb,
|
|
253
|
+
time_limit=time_limit,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# Saves important jupyter configuration parameters to class attributes
|
|
257
|
+
self.port = port
|
|
258
|
+
self.notebook_dir = notebook_directory
|
|
259
|
+
|
|
260
|
+
# Similar to job ID, these attributes initialize to None and are reconfigured as part of the job submission
|
|
261
|
+
# process.
|
|
262
|
+
self.connection_info: _JupyterConnectionInfo | None = None
|
|
263
|
+
self.host: str | None = None
|
|
264
|
+
self.user: str | None = None
|
|
265
|
+
|
|
266
|
+
# Resolves the server-side path to the jupyter server connection info file.
|
|
267
|
+
self.connection_info_file = working_directory.joinpath(f"{job_name}_connection.txt")
|
|
268
|
+
|
|
269
|
+
# Builds Jupyter launch command.
|
|
270
|
+
self._build_jupyter_command(jupyter_args)
|
|
271
|
+
|
|
272
|
+
def _build_jupyter_command(self, jupyter_args: str) -> None:
|
|
273
|
+
"""Builds the command to launch Jupyter notebook server on the remote Sun lab server."""
|
|
274
|
+
|
|
275
|
+
# Gets the hostname of the compute node and caches it in the connection data file. Also caches the port name.
|
|
276
|
+
self.add_command('echo "COMPUTE_NODE: $(hostname)" > {}'.format(self.connection_info_file))
|
|
277
|
+
self.add_command('echo "PORT: {}" >> {}'.format(self.port, self.connection_info_file))
|
|
278
|
+
|
|
279
|
+
# Generates a random access token for security and caches it in the connection data file.
|
|
280
|
+
self.add_command("TOKEN=$(openssl rand -hex 24)")
|
|
281
|
+
self.add_command('echo "TOKEN: $TOKEN" >> {}'.format(self.connection_info_file))
|
|
282
|
+
|
|
283
|
+
# Builds Jupyter startup command.
|
|
284
|
+
jupyter_cmd = [
|
|
285
|
+
"jupyter lab",
|
|
286
|
+
"--no-browser",
|
|
287
|
+
f"--port={self.port}",
|
|
288
|
+
"--ip=0.0.0.0", # Listen on all interfaces
|
|
289
|
+
"--ServerApp.allow_origin='*'", # Allow connections from SSH tunnel
|
|
290
|
+
"--ServerApp.allow_remote_access=True", # Enable remote access
|
|
291
|
+
"--ServerApp.disable_check_xsrf=True", # Helps with proxy connections
|
|
292
|
+
f"--ServerApp.root_dir={self.notebook_dir}", # Root directory (not notebook-dir)
|
|
293
|
+
"--IdentityProvider.token=$TOKEN", # Token authentication
|
|
294
|
+
]
|
|
295
|
+
|
|
296
|
+
# Adds any additional arguments.
|
|
297
|
+
if jupyter_args:
|
|
298
|
+
jupyter_cmd.append(jupyter_args)
|
|
299
|
+
|
|
300
|
+
# Adds resolved jupyter command to the list of job commands.
|
|
301
|
+
jupyter_cmd_str = " ".join(jupyter_cmd)
|
|
302
|
+
self.add_command(jupyter_cmd_str)
|
|
303
|
+
|
|
304
|
+
def parse_connection_info(self, info_file: Path) -> None:
|
|
305
|
+
"""Parses the connection information file created by the Jupyter job on the server.
|
|
306
|
+
|
|
307
|
+
Use this method to parse the connection file fetched from the server to finalize setting up the Jupyter
|
|
308
|
+
server job.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
info_file: The path to the .txt file generated by the remote server that stores the Jupyter connection
|
|
312
|
+
information to be parsed.
|
|
313
|
+
"""
|
|
314
|
+
|
|
315
|
+
with open(info_file, "r") as f:
|
|
316
|
+
content = f.read()
|
|
317
|
+
|
|
318
|
+
# Extracts information using regex
|
|
319
|
+
compute_node_match = re.search(r"COMPUTE_NODE: (.+)", content)
|
|
320
|
+
port_match = re.search(r"PORT: (\d+)", content)
|
|
321
|
+
token_match = re.search(r"TOKEN: (.+)", content)
|
|
322
|
+
|
|
323
|
+
if not all([compute_node_match, port_match, token_match]):
|
|
324
|
+
message = f"Could not parse connection information file for the Jupyter server job with id {self.job_id}."
|
|
325
|
+
console.error(message, ValueError)
|
|
326
|
+
|
|
327
|
+
# Stores extracted data inside connection_info attribute as a JupyterConnectionInfo instance.
|
|
328
|
+
self.connection_info = _JupyterConnectionInfo(
|
|
329
|
+
compute_node=compute_node_match.group(1).strip(), # type: ignore
|
|
330
|
+
port=int(port_match.group(1)), # type: ignore
|
|
331
|
+
token=token_match.group(1).strip(), # type: ignore
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
def print_connection_info(self) -> None:
|
|
335
|
+
"""Constructs and displays the command to set up the SSH tunnel to the server and the link to the localhost
|
|
336
|
+
server view in the terminal.
|
|
337
|
+
|
|
338
|
+
The SSH command should be used via a separate terminal or subprocess call to establish the secure SSH tunnel to
|
|
339
|
+
the Jupyter server. Once the SSH tunnel is established, the printed localhost url can be used to view the
|
|
340
|
+
server from the local machine.
|
|
341
|
+
"""
|
|
342
|
+
|
|
343
|
+
# If connection information is not available, there is nothing to print
|
|
344
|
+
if self.connection_info is None:
|
|
345
|
+
console.echo(
|
|
346
|
+
message=(
|
|
347
|
+
f"No connection information is available for the job {self.job_name}, which indicates that the job "
|
|
348
|
+
f"has not been submitted to the server. Submit the job for execution to the remote Sun lab server "
|
|
349
|
+
f"to generate the connection information"
|
|
350
|
+
),
|
|
351
|
+
level=LogLevel.WARNING,
|
|
352
|
+
)
|
|
353
|
+
return # No connection information available, so does not proceed with printing.
|
|
354
|
+
|
|
355
|
+
# Prints generic connection details to terminal
|
|
356
|
+
console.echo(f"Jupyter is running on: {self.connection_info.compute_node}")
|
|
357
|
+
console.echo(f"Port: {self.connection_info.port}")
|
|
358
|
+
console.echo(f"Token: {self.connection_info.token}")
|
|
359
|
+
|
|
360
|
+
# Constructs and displays the SSH tunnel command and the localhost url for connecting to the server
|
|
361
|
+
tunnel_cmd = (
|
|
362
|
+
f"ssh -N -L {self.connection_info.port}:{self.connection_info.compute_node}:{self.connection_info.port} "
|
|
363
|
+
f"{self.user}@{self.host}"
|
|
364
|
+
)
|
|
365
|
+
localhost_url = f"http://localhost:{self.connection_info.port}/?token={self.connection_info.token}"
|
|
366
|
+
print(f"\nTo access locally, run this in a terminal:")
|
|
367
|
+
print(tunnel_cmd)
|
|
368
|
+
print(f"\nThen open: {localhost_url}")
|