sl-shared-assets 2.0.0__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +17 -9
- sl_shared_assets/__init__.pyi +12 -8
- sl_shared_assets/cli.py +266 -20
- sl_shared_assets/cli.pyi +46 -5
- sl_shared_assets/data_classes/__init__.py +8 -3
- sl_shared_assets/data_classes/__init__.pyi +8 -4
- sl_shared_assets/data_classes/configuration_data.py +149 -30
- sl_shared_assets/data_classes/configuration_data.pyi +49 -11
- sl_shared_assets/data_classes/runtime_data.py +70 -49
- sl_shared_assets/data_classes/runtime_data.pyi +41 -33
- sl_shared_assets/data_classes/session_data.py +193 -253
- sl_shared_assets/data_classes/session_data.pyi +99 -116
- sl_shared_assets/data_classes/surgery_data.py +1 -1
- sl_shared_assets/server/__init__.py +2 -2
- sl_shared_assets/server/__init__.pyi +5 -2
- sl_shared_assets/server/job.py +229 -1
- sl_shared_assets/server/job.pyi +111 -0
- sl_shared_assets/server/server.py +431 -31
- sl_shared_assets/server/server.pyi +158 -15
- sl_shared_assets/tools/__init__.py +2 -1
- sl_shared_assets/tools/__init__.pyi +2 -0
- sl_shared_assets/tools/ascension_tools.py +9 -21
- sl_shared_assets/tools/ascension_tools.pyi +1 -1
- sl_shared_assets/tools/packaging_tools.py +2 -2
- sl_shared_assets/tools/project_management_tools.py +147 -41
- sl_shared_assets/tools/project_management_tools.pyi +45 -6
- {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/METADATA +127 -13
- sl_shared_assets-3.0.0.dist-info/RECORD +36 -0
- {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/entry_points.txt +2 -0
- sl_shared_assets-2.0.0.dist-info/RECORD +0 -36
- {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/WHEEL +0 -0
- {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,8 +5,8 @@ classes, which are also stored as .yaml files inside each session's raw_data and
|
|
|
5
5
|
these classes contain all necessary information to restore the data hierarchy on any machine. All other Sun lab
|
|
6
6
|
libraries use these classes to work with all lab-generated data."""
|
|
7
7
|
|
|
8
|
-
import re
|
|
9
8
|
import copy
|
|
9
|
+
from enum import StrEnum
|
|
10
10
|
import shutil as sh
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
from dataclasses import field, dataclass
|
|
@@ -16,139 +16,43 @@ from ataraxis_base_utilities import LogLevel, console, ensure_directory_exists
|
|
|
16
16
|
from ataraxis_data_structures import YamlConfig
|
|
17
17
|
from ataraxis_time.time_helpers import get_timestamp
|
|
18
18
|
|
|
19
|
-
from .configuration_data import get_system_configuration_data
|
|
19
|
+
from .configuration_data import AcquisitionSystems, get_system_configuration_data
|
|
20
20
|
|
|
21
|
-
# Stores all supported input for SessionData class 'session_type' fields.
|
|
22
|
-
_valid_session_types = {"lick training", "run training", "mesoscope experiment", "window checking"}
|
|
23
21
|
|
|
22
|
+
class SessionTypes(StrEnum):
|
|
23
|
+
"""Defines the set of data acquisition session types supported by various data acquisition systems used in the
|
|
24
|
+
Sun lab.
|
|
24
25
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
python_version: str = ""
|
|
30
|
-
"""Stores the Python version used by the environment that acquired the data."""
|
|
31
|
-
sl_experiment_version: str = ""
|
|
32
|
-
"""Stores the version of the sl-experiment library that was used to acquire the data."""
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
@dataclass()
|
|
36
|
-
class ProjectConfiguration(YamlConfig):
|
|
37
|
-
"""Stores the project-specific configuration parameters that do not change between different animals and runtime
|
|
38
|
-
sessions.
|
|
39
|
-
|
|
40
|
-
An instance of this class is generated and saved as a .yaml file in the 'configuration' directory of each project
|
|
41
|
-
when it is created. After that, the stored data is reused for every runtime (training or experiment session) carried
|
|
42
|
-
out for each animal of the project. Additionally, a copy of the most actual configuration file is saved inside each
|
|
43
|
-
runtime session's 'raw_data' folder, providing seamless integration between the managed data and various Sun lab
|
|
44
|
-
(sl-) libraries.
|
|
26
|
+
A data acquisition session broadly encompasses a recording session carried out to either: acquire experiment data,
|
|
27
|
+
train the animal for the upcoming experiments, or to assess the quality of surgical or other pre-experiment
|
|
28
|
+
intervention.
|
|
45
29
|
|
|
46
30
|
Notes:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
"""
|
|
51
|
-
|
|
52
|
-
project_name: str = ""
|
|
53
|
-
"""Stores the descriptive name of the project. This name is used to create the root directory for the project and
|
|
54
|
-
to initialize SessionData instances each time any Sun lab library interacts with the session's data."""
|
|
55
|
-
surgery_sheet_id: str = ""
|
|
56
|
-
"""The ID of the Google Sheet file that stores information about surgical interventions performed on all animals
|
|
57
|
-
participating in the managed project. This log sheet is used to parse and write the surgical intervention data for
|
|
58
|
-
each animal into every runtime session raw_data folder, so that the surgery data is always kept together with the
|
|
59
|
-
rest of the training and experiment data."""
|
|
60
|
-
water_log_sheet_id: str = ""
|
|
61
|
-
"""The ID of the Google Sheet file that stores information about water restriction (and behavior tracker)
|
|
62
|
-
information for all animals participating in the managed project. This is used to synchronize the information
|
|
63
|
-
inside the water restriction log with the state of the animal at the end of each training or experiment session.
|
|
31
|
+
This enumeration does not differentiate between different acquisition systems. Different acquisition systems
|
|
32
|
+
support different session types, and may not be suited for acquiring some of the session types listed in this
|
|
33
|
+
enumeration.
|
|
64
34
|
"""
|
|
65
35
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
The initialized ProjectConfiguration instance that stores the configuration data for the target project.
|
|
78
|
-
|
|
79
|
-
Raise:
|
|
80
|
-
FileNotFoundError: If the specified configuration file does not exist or is not a valid YAML file.
|
|
81
|
-
"""
|
|
82
|
-
|
|
83
|
-
# Prevents loading non-existent files.
|
|
84
|
-
if configuration_path.suffix != ".yaml" or not configuration_path.exists():
|
|
85
|
-
message = (
|
|
86
|
-
f"Unable to load the project configuration data from the specified path: {configuration_path}. Valid "
|
|
87
|
-
f"configuration file paths should use the '.yaml' extension and point to an existing file."
|
|
88
|
-
)
|
|
89
|
-
console.error(message=message, error=FileNotFoundError)
|
|
90
|
-
|
|
91
|
-
# Loads the data from the YAML file and initializes the class instance.
|
|
92
|
-
instance: ProjectConfiguration = cls.from_yaml(file_path=configuration_path) # type: ignore
|
|
93
|
-
|
|
94
|
-
# Verifies the loaded data. Most importantly, this step does not allow proceeding if the user did not
|
|
95
|
-
# replace the surgery log and water restriction log placeholders with valid ID values.
|
|
96
|
-
instance._verify_data()
|
|
97
|
-
|
|
98
|
-
# Returns the initialized class instance to caller
|
|
99
|
-
return instance
|
|
100
|
-
|
|
101
|
-
def save(self, path: Path) -> None:
|
|
102
|
-
"""Saves class instance data to disk as a project_configuration.yaml file.
|
|
103
|
-
|
|
104
|
-
This method is automatically called from the 'sl_experiment' library when a new project is created. After this
|
|
105
|
-
method's runtime, all future project initialization calls will use the load() method to reuse configuration data
|
|
106
|
-
saved to the .yaml file created by this method.
|
|
107
|
-
|
|
108
|
-
Args:
|
|
109
|
-
path: The path to the .yaml file to save the data to.
|
|
110
|
-
"""
|
|
111
|
-
|
|
112
|
-
# Saves the data to the YAML file
|
|
113
|
-
self.to_yaml(file_path=path)
|
|
114
|
-
|
|
115
|
-
def _verify_data(self) -> None:
|
|
116
|
-
"""Verifies the user-modified data loaded from the project_configuration.yaml file.
|
|
117
|
-
|
|
118
|
-
Since this class is explicitly designed to be modified by the user, this verification step is carried out to
|
|
119
|
-
ensure that the loaded data matches expectations. This reduces the potential for user errors to impact the
|
|
120
|
-
runtime behavior of the libraries using this class. This internal method is automatically called by the load()
|
|
121
|
-
method.
|
|
122
|
-
|
|
123
|
-
Raises:
|
|
124
|
-
ValueError: If the loaded data does not match expected formats or values.
|
|
125
|
-
"""
|
|
126
|
-
|
|
127
|
-
# Verifies Google Sheet ID formatting. Google Sheet IDs are usually 44 characters long, containing letters,
|
|
128
|
-
# numbers, hyphens, and underscores
|
|
129
|
-
pattern = r"^[a-zA-Z0-9_-]{44}$"
|
|
130
|
-
if not re.match(pattern, self.surgery_sheet_id):
|
|
131
|
-
message = (
|
|
132
|
-
f"Unable to verify the surgery_sheet_id field loaded from the 'project_configuration.yaml' file. "
|
|
133
|
-
f"Expected a string with 44 characters, using letters, numbers, hyphens, and underscores, but found: "
|
|
134
|
-
f"{self.surgery_sheet_id}."
|
|
135
|
-
)
|
|
136
|
-
console.error(message=message, error=ValueError)
|
|
137
|
-
if not re.match(pattern, self.water_log_sheet_id):
|
|
138
|
-
message = (
|
|
139
|
-
f"Unable to verify the surgery_sheet_id field loaded from the 'project_configuration.yaml' file. "
|
|
140
|
-
f"Expected a string with 44 characters, using letters, numbers, hyphens, and underscores, but found: "
|
|
141
|
-
f"{self.water_log_sheet_id}."
|
|
142
|
-
)
|
|
143
|
-
console.error(message=message, error=ValueError)
|
|
36
|
+
LICK_TRAINING = "lick training"
|
|
37
|
+
"""Mesoscope-VR session designed to teach animals to use the water delivery port while being head-fixed."""
|
|
38
|
+
RUN_TRAINING = "run training"
|
|
39
|
+
"""Mesoscope-VR session designed to teach animals how to run on the treadmill while being head-fixed."""
|
|
40
|
+
MESOSCOPE_EXPERIMENT = "mesoscope experiment"
|
|
41
|
+
"""Mesoscope-VR experiment session. The session uses Unity game engine to run experiments in virtual reality task
|
|
42
|
+
environments and collects brain activity data using Mesoscope."""
|
|
43
|
+
WINDOW_CHECKING = "window checking"
|
|
44
|
+
"""A special Mesoscope-VR session designed to evaluate the suitability of the given animal to be included into the
|
|
45
|
+
experiment dataset. Specifically, the session involves using the Mesoscope to check the quality of the cell
|
|
46
|
+
activity data."""
|
|
144
47
|
|
|
145
48
|
|
|
146
49
|
@dataclass()
|
|
147
50
|
class RawData:
|
|
148
51
|
"""Stores the paths to the directories and files that make up the 'raw_data' session-specific directory.
|
|
149
52
|
|
|
150
|
-
The raw_data directory stores the data acquired during the session runtime before and after
|
|
151
|
-
preprocessing does not alter the data, any data in that folder is considered
|
|
53
|
+
The raw_data directory stores the data acquired during the session data acquisition runtime, before and after
|
|
54
|
+
preprocessing. Since preprocessing does not irreversibly alter the data, any data in that folder is considered
|
|
55
|
+
'raw,' event if preprocessing losslessly re-compresses the data for efficient transfer.
|
|
152
56
|
|
|
153
57
|
Notes:
|
|
154
58
|
Sun lab data management strategy primarily relies on keeping multiple redundant copies of the raw_data for
|
|
@@ -165,38 +69,34 @@ class RawData:
|
|
|
165
69
|
includes .mp4 video files from each recorded camera."""
|
|
166
70
|
mesoscope_data_path: Path = Path()
|
|
167
71
|
"""Stores the path to the directory that contains all Mesoscope data acquired during the session. Primarily, this
|
|
168
|
-
includes the mesoscope-acquired .tiff files (brain activity data) and the
|
|
169
|
-
created for all sessions, but is only used (filled) by the sessions that use
|
|
170
|
-
brain activity data."""
|
|
72
|
+
includes the mesoscope-acquired .tiff files (brain activity data) and the MotionEstimator.me file (motion
|
|
73
|
+
estimation data). This directory is created for all sessions, but is only used (filled) by the sessions that use
|
|
74
|
+
the Mesoscope-VR system to acquire brain activity data."""
|
|
171
75
|
behavior_data_path: Path = Path()
|
|
172
76
|
"""Stores the path to the directory that contains all non-video behavior data acquired during the session.
|
|
173
77
|
Primarily, this includes the .npz log files that store serialized data acquired by all hardware components of the
|
|
174
|
-
data acquisition system other than cameras and brain activity data acquisition devices (such as the Mesoscope).
|
|
175
|
-
The reason why the directory is called 'behavior' is primarily because all .npz files are parsed to infer the
|
|
176
|
-
behavior of the animal, in contrast to brain (cell) activity data."""
|
|
78
|
+
data acquisition system other than cameras and brain activity data acquisition devices (such as the Mesoscope)."""
|
|
177
79
|
zaber_positions_path: Path = Path()
|
|
178
80
|
"""Stores the path to the zaber_positions.yaml file. This file contains the snapshot of all Zaber motor positions
|
|
179
|
-
at the end of the session. Zaber motors are used to position the LickPort
|
|
180
|
-
|
|
181
|
-
|
|
81
|
+
at the end of the session. Zaber motors are used to position the LickPort, HeadBar, and Wheel Mesoscope-VR modules
|
|
82
|
+
to support proper brain activity recording and behavior during the session. This file is only created for sessions
|
|
83
|
+
that use the Mesoscope-VR system."""
|
|
182
84
|
session_descriptor_path: Path = Path()
|
|
183
|
-
"""Stores the path to the session_descriptor.yaml file. This file is
|
|
184
|
-
and
|
|
185
|
-
|
|
85
|
+
"""Stores the path to the session_descriptor.yaml file. This file is filled jointly by the data acquisition system
|
|
86
|
+
and the experimenter. It contains session-specific information, such as the specific task parameters and the notes
|
|
87
|
+
made by the experimenter during runtime. Each supported session type uses a unique SessionDescriptor class to define
|
|
88
|
+
the format and content of the session_descriptor.yaml file."""
|
|
186
89
|
hardware_state_path: Path = Path()
|
|
187
90
|
"""Stores the path to the hardware_state.yaml file. This file contains the partial snapshot of the calibration
|
|
188
|
-
parameters used by the data acquisition
|
|
189
|
-
|
|
91
|
+
parameters used by the data acquisition system modules during the session. Primarily, it is used during data
|
|
92
|
+
processing to interpret the raw data stored inside .npz log files."""
|
|
190
93
|
surgery_metadata_path: Path = Path()
|
|
191
94
|
"""Stores the path to the surgery_metadata.yaml file. This file contains the most actual information about the
|
|
192
95
|
surgical intervention(s) performed on the animal prior to the session."""
|
|
193
|
-
project_configuration_path: Path = Path()
|
|
194
|
-
"""Stores the path to the project_configuration.yaml file. This file contains the snapshot of the configuration
|
|
195
|
-
parameters for the session's project."""
|
|
196
96
|
session_data_path: Path = Path()
|
|
197
97
|
"""Stores the path to the session_data.yaml file. This path is used by the SessionData instance to save itself to
|
|
198
|
-
disk as a .yaml file.
|
|
199
|
-
|
|
98
|
+
disk as a .yaml file. In turn, the cached data is reused to reinstate the same data hierarchy across all supported
|
|
99
|
+
destinations, enabling various libraries to interface with the session data."""
|
|
200
100
|
experiment_configuration_path: Path = Path()
|
|
201
101
|
"""Stores the path to the experiment_configuration.yaml file. This file contains the snapshot of the
|
|
202
102
|
experiment runtime configuration used by the session. This file is only created for experiment sessions."""
|
|
@@ -206,13 +106,13 @@ class RawData:
|
|
|
206
106
|
the 'virtual' tip, tilt, and fastZ positions set via ScanImage software. This file is only created for sessions that
|
|
207
107
|
use the Mesoscope-VR system to acquire brain activity data."""
|
|
208
108
|
window_screenshot_path: Path = Path()
|
|
209
|
-
"""Stores the path to the .png screenshot of the ScanImagePC screen.
|
|
210
|
-
|
|
211
|
-
alignment and appearance for each experiment session. This file is only created for sessions
|
|
212
|
-
Mesoscope-VR system to acquire brain activity data."""
|
|
109
|
+
"""Stores the path to the .png screenshot of the ScanImagePC screen. As a minimum, the screenshot should contain the
|
|
110
|
+
image of the imaging plane and the red-dot alignment window. This is used to generate a visual snapshot of the
|
|
111
|
+
cranial window alignment and cell appearance for each experiment session. This file is only created for sessions
|
|
112
|
+
that use the Mesoscope-VR system to acquire brain activity data."""
|
|
213
113
|
system_configuration_path: Path = Path()
|
|
214
114
|
"""Stores the path to the system_configuration.yaml file. This file contains the exact snapshot of the data
|
|
215
|
-
acquisition
|
|
115
|
+
acquisition system configuration parameters used to acquire session data."""
|
|
216
116
|
checksum_path: Path = Path()
|
|
217
117
|
"""Stores the path to the ax_checksum.txt file. This file is generated as part of packaging the data for
|
|
218
118
|
transmission and stores the xxHash-128 checksum of the data. It is used to verify that the transmission did not
|
|
@@ -223,29 +123,30 @@ class RawData:
|
|
|
223
123
|
telomere.bin file are considered 'incomplete' and are excluded from all automated processing, as they may contain
|
|
224
124
|
corrupted, incomplete, or otherwise unusable data."""
|
|
225
125
|
ubiquitin_path: Path = Path()
|
|
226
|
-
"""Stores the path to the ubiquitin.bin file. This file is primarily used by the sl-experiment
|
|
126
|
+
"""Stores the path to the ubiquitin.bin file. This file is primarily used by the sl-experiment library to mark
|
|
227
127
|
local session data directories for deletion (purging). Typically, it is created once the data is safely moved to
|
|
228
128
|
the long-term storage destinations (NAS and Server) and the integrity of the moved data is verified on at least one
|
|
229
|
-
destination. During 'purge' sl-experiment runtimes, the library discovers and removes all session data marked
|
|
230
|
-
'ubiquitin.bin' files from the machine that runs the
|
|
129
|
+
destination. During 'sl-purge' sl-experiment runtimes, the library discovers and removes all session data marked
|
|
130
|
+
with 'ubiquitin.bin' files from the machine that runs the command."""
|
|
131
|
+
nk_path: Path = Path()
|
|
132
|
+
"""Stores the path to the nk.bin file. This file is used by the sl-experiment library to mark sessions undergoing
|
|
133
|
+
runtime initialization. Since runtime initialization is a complex process that may encounter a runtime error, the
|
|
134
|
+
marker is used to discover sessions that failed to initialize. Since uninitialized sessions by definition do not
|
|
135
|
+
contain any valuable data, they are marked for immediate deletion from all managed destinations."""
|
|
231
136
|
integrity_verification_tracker_path: Path = Path()
|
|
232
137
|
"""Stores the path to the integrity_verification.yaml tracker file. This file stores the current state of the data
|
|
233
138
|
integrity verification pipeline. It prevents more than one instance of the pipeline from working with the data
|
|
234
139
|
at a given time and communicates the outcome (success or failure) of the most recent pipeline runtime."""
|
|
235
|
-
version_data_path: Path = Path()
|
|
236
|
-
"""Stores the path to the version_data.yaml file. This file contains the snapshot of Python and sl-experiment
|
|
237
|
-
library versions that were used when the data was acquired."""
|
|
238
140
|
|
|
239
141
|
def resolve_paths(self, root_directory_path: Path) -> None:
|
|
240
142
|
"""Resolves all paths managed by the class instance based on the input root directory path.
|
|
241
143
|
|
|
242
|
-
This method is called each time the class is instantiated to regenerate the managed path
|
|
243
|
-
machine that instantiates the class.
|
|
144
|
+
This method is called each time the (wrapper) SessionData class is instantiated to regenerate the managed path
|
|
145
|
+
hierarchy on any machine that instantiates the class.
|
|
244
146
|
|
|
245
147
|
Args:
|
|
246
|
-
root_directory_path: The path to the top-level directory of the
|
|
247
|
-
|
|
248
|
-
the managed session.
|
|
148
|
+
root_directory_path: The path to the top-level directory of the session. Typically, this path is assembled
|
|
149
|
+
using the following hierarchy: root/project/animal/session_id
|
|
249
150
|
"""
|
|
250
151
|
|
|
251
152
|
# Generates the managed paths
|
|
@@ -257,7 +158,6 @@ class RawData:
|
|
|
257
158
|
self.session_descriptor_path = self.raw_data_path.joinpath("session_descriptor.yaml")
|
|
258
159
|
self.hardware_state_path = self.raw_data_path.joinpath("hardware_state.yaml")
|
|
259
160
|
self.surgery_metadata_path = self.raw_data_path.joinpath("surgery_metadata.yaml")
|
|
260
|
-
self.project_configuration_path = self.raw_data_path.joinpath("project_configuration.yaml")
|
|
261
161
|
self.session_data_path = self.raw_data_path.joinpath("session_data.yaml")
|
|
262
162
|
self.experiment_configuration_path = self.raw_data_path.joinpath("experiment_configuration.yaml")
|
|
263
163
|
self.mesoscope_positions_path = self.raw_data_path.joinpath("mesoscope_positions.yaml")
|
|
@@ -266,11 +166,15 @@ class RawData:
|
|
|
266
166
|
self.system_configuration_path = self.raw_data_path.joinpath("system_configuration.yaml")
|
|
267
167
|
self.telomere_path = self.raw_data_path.joinpath("telomere.bin")
|
|
268
168
|
self.ubiquitin_path = self.raw_data_path.joinpath("ubiquitin.bin")
|
|
169
|
+
self.nk_path = self.raw_data_path.joinpath("nk.bin")
|
|
269
170
|
self.integrity_verification_tracker_path = self.raw_data_path.joinpath("integrity_verification_tracker.yaml")
|
|
270
|
-
self.version_data_path = self.raw_data_path.joinpath("version_data.yaml")
|
|
271
171
|
|
|
272
172
|
def make_directories(self) -> None:
|
|
273
|
-
"""Ensures that all major subdirectories and the root directory exist, creating any missing directories.
|
|
173
|
+
"""Ensures that all major subdirectories and the root directory exist, creating any missing directories.
|
|
174
|
+
|
|
175
|
+
This method is called each time the (wrapper) SessionData class is instantiated and allowed to generate
|
|
176
|
+
missing data directories.
|
|
177
|
+
"""
|
|
274
178
|
ensure_directory_exists(self.raw_data_path)
|
|
275
179
|
ensure_directory_exists(self.camera_data_path)
|
|
276
180
|
ensure_directory_exists(self.mesoscope_data_path)
|
|
@@ -287,113 +191,113 @@ class ProcessedData:
|
|
|
287
191
|
"""
|
|
288
192
|
|
|
289
193
|
processed_data_path: Path = Path()
|
|
290
|
-
"""Stores the path to the root processed_data directory of the session. This directory stores the processed
|
|
291
|
-
|
|
194
|
+
"""Stores the path to the root processed_data directory of the session. This directory stores the processed session
|
|
195
|
+
data, generated from raw_data directory contents by various data processing pipelines."""
|
|
292
196
|
camera_data_path: Path = Path()
|
|
293
|
-
"""Stores the path to the directory that contains video tracking data generated by
|
|
294
|
-
processing
|
|
197
|
+
"""Stores the path to the directory that contains video tracking data generated by the Sun lab DeepLabCut-based
|
|
198
|
+
video processing pipeline(s)."""
|
|
295
199
|
mesoscope_data_path: Path = Path()
|
|
296
|
-
"""Stores path to the directory that contains processed brain activity (cell) data generated by
|
|
297
|
-
|
|
298
|
-
the Mesoscope-VR system.
|
|
200
|
+
"""Stores path to the directory that contains processed brain activity (cell) data generated by sl-suite2p
|
|
201
|
+
processing pipelines (single-day and multi-day). This directory is only used by sessions acquired with
|
|
202
|
+
the Mesoscope-VR system."""
|
|
299
203
|
behavior_data_path: Path = Path()
|
|
300
204
|
"""Stores the path to the directory that contains the non-video and non-brain-activity data extracted from
|
|
301
|
-
.npz log files by
|
|
302
|
-
job_logs_path: Path = Path()
|
|
303
|
-
"""Stores the path to the directory that stores the standard output and standard error data collected during
|
|
304
|
-
server-side data processing pipeline runtimes. This directory is primarily used when running data processing jobs
|
|
305
|
-
on the remote server. However, it is possible to configure local runtimes to also redirect log data to files
|
|
306
|
-
stored in this directory (by editing ataraxis-base-utilities 'console' variable)."""
|
|
205
|
+
.npz log files by the sl-behavior log processing pipeline."""
|
|
307
206
|
suite2p_processing_tracker_path: Path = Path()
|
|
308
|
-
"""Stores the path to the suite2p_processing_tracker.yaml tracker file. This file stores the current state of
|
|
309
|
-
sl-suite2p single-day
|
|
310
|
-
dataset_formation_tracker_path: Path = Path()
|
|
311
|
-
"""Same as suite2p_processing_tracker_path, but stores the current state of the dataset formation process that
|
|
312
|
-
includes this session (communicates whether the session has been successfully added to any dataset(s))."""
|
|
207
|
+
"""Stores the path to the suite2p_processing_tracker.yaml tracker file. This file stores the current state of
|
|
208
|
+
processing the session with the sl-suite2p single-day pipeline."""
|
|
313
209
|
behavior_processing_tracker_path: Path = Path()
|
|
314
|
-
"""Stores the path to the behavior_processing_tracker.yaml file. This file stores the current state of
|
|
315
|
-
behavior
|
|
210
|
+
"""Stores the path to the behavior_processing_tracker.yaml file. This file stores the current state of processing
|
|
211
|
+
the session with the sl-behavior log-parsing pipeline."""
|
|
316
212
|
video_processing_tracker_path: Path = Path()
|
|
317
|
-
"""Stores the path to the video_processing_tracker.yaml file. This file stores the current state of
|
|
318
|
-
|
|
213
|
+
"""Stores the path to the video_processing_tracker.yaml file. This file stores the current state of processing
|
|
214
|
+
the session with the DeepLabCut-based video processing pipeline."""
|
|
215
|
+
p53_path: Path = Path()
|
|
216
|
+
"""Stores the path to the p53.bin file. This file serves as a lock-in marker that determines whether the session is
|
|
217
|
+
in the processing or dataset state. Specifically, if the file does not exist, the session data cannot be integrated
|
|
218
|
+
into any dataset, as it may be actively worked on by processing pipelines. Conversely, if the marker exists,
|
|
219
|
+
processing pipelines are not allowed to work with the session, as it may be actively integrated into one or more
|
|
220
|
+
datasets."""
|
|
319
221
|
|
|
320
222
|
def resolve_paths(self, root_directory_path: Path) -> None:
|
|
321
223
|
"""Resolves all paths managed by the class instance based on the input root directory path.
|
|
322
224
|
|
|
323
|
-
This method is called each time the class is instantiated to regenerate the managed path
|
|
324
|
-
machine that instantiates the class.
|
|
225
|
+
This method is called each time the (wrapper) SessionData class is instantiated to regenerate the managed path
|
|
226
|
+
hierarchy on any machine that instantiates the class.
|
|
325
227
|
|
|
326
228
|
Args:
|
|
327
|
-
root_directory_path: The path to the top-level directory of the
|
|
328
|
-
|
|
329
|
-
the managed session.
|
|
229
|
+
root_directory_path: The path to the top-level directory of the session. Typically, this path is assembled
|
|
230
|
+
using the following hierarchy: root/project/animal/session_id
|
|
330
231
|
"""
|
|
331
232
|
# Generates the managed paths
|
|
332
233
|
self.processed_data_path = root_directory_path
|
|
333
234
|
self.camera_data_path = self.processed_data_path.joinpath("camera_data")
|
|
334
235
|
self.mesoscope_data_path = self.processed_data_path.joinpath("mesoscope_data")
|
|
335
236
|
self.behavior_data_path = self.processed_data_path.joinpath("behavior_data")
|
|
336
|
-
self.job_logs_path = self.processed_data_path.joinpath("job_logs")
|
|
337
237
|
self.suite2p_processing_tracker_path = self.processed_data_path.joinpath("suite2p_processing_tracker.yaml")
|
|
338
|
-
self.dataset_formation_tracker_path = self.processed_data_path.joinpath("dataset_formation_tracker.yaml")
|
|
339
238
|
self.behavior_processing_tracker_path = self.processed_data_path.joinpath("behavior_processing_tracker.yaml")
|
|
340
239
|
self.video_processing_tracker_path = self.processed_data_path.joinpath("video_processing_tracker.yaml")
|
|
240
|
+
self.p53_path = self.processed_data_path.joinpath("p53.bin")
|
|
341
241
|
|
|
342
242
|
def make_directories(self) -> None:
|
|
343
|
-
"""Ensures that all major subdirectories and the root directory exist, creating any missing directories.
|
|
243
|
+
"""Ensures that all major subdirectories and the root directory exist, creating any missing directories.
|
|
244
|
+
|
|
245
|
+
This method is called each time the (wrapper) SessionData class is instantiated and allowed to generate
|
|
246
|
+
missing data directories.
|
|
247
|
+
"""
|
|
344
248
|
|
|
345
249
|
ensure_directory_exists(self.processed_data_path)
|
|
346
250
|
ensure_directory_exists(self.camera_data_path)
|
|
347
251
|
ensure_directory_exists(self.behavior_data_path)
|
|
348
|
-
ensure_directory_exists(self.job_logs_path)
|
|
349
252
|
|
|
350
253
|
|
|
351
254
|
@dataclass
|
|
352
255
|
class SessionData(YamlConfig):
|
|
353
|
-
"""Stores and manages the data layout of a single
|
|
354
|
-
|
|
355
|
-
The primary purpose of this class is to maintain the session data structure across all supported destinations and
|
|
356
|
-
during all processing stages. It generates the paths used by all other classes from all Sun lab libraries that
|
|
357
|
-
interact with the session's data from the point of its creation and until the data is integrated into an
|
|
358
|
-
analysis dataset.
|
|
256
|
+
"""Stores and manages the data layout of a single Sun lab data acquisition session.
|
|
359
257
|
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
session
|
|
258
|
+
The primary purpose of this class is to maintain the session data structure across all supported destinations and to
|
|
259
|
+
provide a unified data access interface shared by all Sun lab libraries. The class can be used to either generate a
|
|
260
|
+
new session or load the layout of an already existing session. When the class is used to create a new session, it
|
|
261
|
+
generates the new session's name using the current UTC timestamp, accurate to microseconds. This ensures that each
|
|
262
|
+
session 'name' is unique and preserves the overall session order.
|
|
364
263
|
|
|
365
264
|
Notes:
|
|
366
265
|
This class is specifically designed for working with the data from a single session, performed by a single
|
|
367
266
|
animal under the specific experiment. The class is used to manage both raw and processed data. It follows the
|
|
368
|
-
data through acquisition, preprocessing and processing stages of the Sun lab data workflow.
|
|
369
|
-
|
|
370
|
-
data.
|
|
267
|
+
data through acquisition, preprocessing and processing stages of the Sun lab data workflow. This class serves as
|
|
268
|
+
an entry point for all interactions with the managed session's data.
|
|
371
269
|
"""
|
|
372
270
|
|
|
373
271
|
project_name: str
|
|
374
|
-
"""Stores the name of the
|
|
272
|
+
"""Stores the name of the project for which the session was acquired."""
|
|
375
273
|
animal_id: str
|
|
376
|
-
"""Stores the unique identifier of the animal that participates in the
|
|
274
|
+
"""Stores the unique identifier of the animal that participates in the session."""
|
|
377
275
|
session_name: str
|
|
378
|
-
"""Stores the name (timestamp-based ID) of the
|
|
379
|
-
session_type: str
|
|
380
|
-
"""Stores the type of the session.
|
|
381
|
-
|
|
382
|
-
'mesoscope experiment'.
|
|
276
|
+
"""Stores the name (timestamp-based ID) of the session."""
|
|
277
|
+
session_type: str | SessionTypes
|
|
278
|
+
"""Stores the type of the session. Has to be set to one of the supported session types, defined in the SessionTypes
|
|
279
|
+
enumeration exposed by the sl-shared-assets library.
|
|
383
280
|
"""
|
|
384
|
-
acquisition_system: str
|
|
385
|
-
"""Stores the name of the data acquisition
|
|
281
|
+
acquisition_system: str | AcquisitionSystems
|
|
282
|
+
"""Stores the name of the data acquisition system that acquired the data. Has to be set to one of the supported
|
|
283
|
+
acquisition systems, defined in the AcquisitionSystems enumeration exposed by the sl-shared-assets library."""
|
|
386
284
|
experiment_name: str | None
|
|
387
|
-
"""Stores the name of the experiment
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
285
|
+
"""Stores the name of the experiment performed during the session. If the session_type field indicates that the
|
|
286
|
+
session is an experiment, this field communicates the specific experiment configuration used by the session. During
|
|
287
|
+
runtime, this name is used to load the specific experiment configuration data stored in a .yaml file with the same
|
|
288
|
+
name. If the session is not an experiment session, this field should be left as Null (None)."""
|
|
289
|
+
python_version: str = "3.11.13"
|
|
290
|
+
"""Stores the Python version that was used to acquire session data."""
|
|
291
|
+
sl_experiment_version: str = "3.0.0"
|
|
292
|
+
"""Stores the version of the sl-experiment library that was used to acquire the session data."""
|
|
391
293
|
raw_data: RawData = field(default_factory=lambda: RawData())
|
|
392
|
-
"""Stores
|
|
393
|
-
|
|
294
|
+
"""Stores absolute paths to all directories and files that jointly make the session's raw data hierarchy. This
|
|
295
|
+
directory structure is resolved for each machine that creates or loads the SessionData class to ensure that all
|
|
296
|
+
Sun lab data can be accessed via the same API on any destination."""
|
|
394
297
|
processed_data: ProcessedData = field(default_factory=lambda: ProcessedData())
|
|
395
|
-
"""Stores
|
|
396
|
-
|
|
298
|
+
"""Stores absolute paths to all directories and files that jointly make the session's processed data hierarchy.
|
|
299
|
+
Typically, this hierarchy is only used on the lab's processing server(s), but it can also be used to run local
|
|
300
|
+
testing on end-user machines."""
|
|
397
301
|
|
|
398
302
|
def __post_init__(self) -> None:
|
|
399
303
|
"""Ensures raw_data and processed_data are always instances of RawData and ProcessedData."""
|
|
@@ -408,9 +312,11 @@ class SessionData(YamlConfig):
|
|
|
408
312
|
cls,
|
|
409
313
|
project_name: str,
|
|
410
314
|
animal_id: str,
|
|
411
|
-
session_type: str,
|
|
315
|
+
session_type: SessionTypes | str,
|
|
412
316
|
experiment_name: str | None = None,
|
|
413
317
|
session_name: str | None = None,
|
|
318
|
+
python_version: str = "3.11.13",
|
|
319
|
+
sl_experiment_version: str = "2.0.0",
|
|
414
320
|
) -> "SessionData":
|
|
415
321
|
"""Creates a new SessionData object and generates the new session's data structure on the local PC.
|
|
416
322
|
|
|
@@ -421,31 +327,37 @@ class SessionData(YamlConfig):
|
|
|
421
327
|
To load an already existing session data structure, use the load() method instead.
|
|
422
328
|
|
|
423
329
|
This method automatically dumps the data of the created SessionData instance into the session_data.yaml file
|
|
424
|
-
inside the root raw_data directory of the created hierarchy. It also finds and dumps other configuration
|
|
425
|
-
files, such as
|
|
426
|
-
|
|
427
|
-
|
|
330
|
+
inside the root 'raw_data' directory of the created hierarchy. It also finds and dumps other configuration
|
|
331
|
+
files, such as experiment_configuration.yaml and system_configuration.yaml into the same 'raw_data'
|
|
332
|
+
directory. If the session's runtime is interrupted unexpectedly, the acquired data can still be processed
|
|
333
|
+
using these pre-saved class instances.
|
|
428
334
|
|
|
429
335
|
Args:
|
|
430
|
-
project_name: The name of the project for which the
|
|
431
|
-
animal_id: The ID code of the animal
|
|
432
|
-
session_type: The type of the session.
|
|
433
|
-
|
|
434
|
-
experiment_name: The name of the experiment executed during
|
|
435
|
-
used for
|
|
436
|
-
|
|
336
|
+
project_name: The name of the project for which the session is carried out.
|
|
337
|
+
animal_id: The ID code of the animal participating in the session.
|
|
338
|
+
session_type: The type of the session. Has to be one of the supported session types exposed by the
|
|
339
|
+
SessionTypes enumeration.
|
|
340
|
+
experiment_name: The name of the experiment executed during the session. This optional argument is only
|
|
341
|
+
used for experiment sessions. Note! The name passed to this argument has to match the name of the
|
|
342
|
+
experiment configuration .yaml file.
|
|
343
|
+
session_name: An optional session name override. Generally, this argument should not be provided for most
|
|
437
344
|
sessions. When provided, the method uses this name instead of generating a new timestamp-based name.
|
|
438
345
|
This is only used during the 'ascension' runtime to convert old data structures to the modern
|
|
439
346
|
lab standards.
|
|
347
|
+
python_version: The string that specifies the Python version used to collect session data. Has to be
|
|
348
|
+
specified using the major.minor.patch version format.
|
|
349
|
+
sl_experiment_version: The string that specifies the version of the sl-experiment library used to collect
|
|
350
|
+
session data. Has to be specified using the major.minor.patch version format.
|
|
440
351
|
|
|
441
352
|
Returns:
|
|
442
353
|
An initialized SessionData instance that stores the layout of the newly created session's data.
|
|
443
354
|
"""
|
|
444
355
|
|
|
445
|
-
|
|
356
|
+
# Need to convert to tuple to support Python 3.11
|
|
357
|
+
if session_type not in tuple(SessionTypes):
|
|
446
358
|
message = (
|
|
447
|
-
f"Invalid session type '{session_type
|
|
448
|
-
f"Use one of the supported session types
|
|
359
|
+
f"Invalid session type '{session_type}' encountered when creating a new SessionData instance. "
|
|
360
|
+
f"Use one of the supported session types from the SessionTypes enumeration."
|
|
449
361
|
)
|
|
450
362
|
console.error(message=message, error=ValueError)
|
|
451
363
|
|
|
@@ -504,25 +416,21 @@ class SessionData(YamlConfig):
|
|
|
504
416
|
project_name=project_name,
|
|
505
417
|
animal_id=animal_id,
|
|
506
418
|
session_name=session_name,
|
|
507
|
-
session_type=session_type
|
|
419
|
+
session_type=session_type,
|
|
508
420
|
acquisition_system=acquisition_system.name,
|
|
509
421
|
raw_data=raw_data,
|
|
510
422
|
processed_data=processed_data,
|
|
511
423
|
experiment_name=experiment_name,
|
|
424
|
+
python_version=python_version,
|
|
425
|
+
sl_experiment_version=sl_experiment_version,
|
|
512
426
|
)
|
|
513
427
|
|
|
514
428
|
# Saves the configured instance data to the session's folder, so that it can be reused during processing or
|
|
515
429
|
# preprocessing.
|
|
516
430
|
instance._save()
|
|
517
431
|
|
|
518
|
-
# Also saves the
|
|
519
|
-
#
|
|
520
|
-
|
|
521
|
-
# Copies the project_configuration.yaml file to session's folder
|
|
522
|
-
project_configuration_path = acquisition_system.paths.root_directory.joinpath(
|
|
523
|
-
project_name, "configuration", "project_configuration.yaml"
|
|
524
|
-
)
|
|
525
|
-
sh.copy2(project_configuration_path, instance.raw_data.project_configuration_path)
|
|
432
|
+
# Also saves the SystemConfiguration and ExperimentConfiguration instances to the same folder using the paths
|
|
433
|
+
# resolved for the RawData instance above.
|
|
526
434
|
|
|
527
435
|
# Dumps the acquisition system's configuration data to session's folder
|
|
528
436
|
acquisition_system.save(path=instance.raw_data.system_configuration_path)
|
|
@@ -534,6 +442,11 @@ class SessionData(YamlConfig):
|
|
|
534
442
|
)
|
|
535
443
|
sh.copy2(experiment_configuration_path, instance.raw_data.experiment_configuration_path)
|
|
536
444
|
|
|
445
|
+
# All newly created sessions are marked with the 'nk.bin' file. If the marker is not removed during runtime,
|
|
446
|
+
# the session becomes a valid target for deletion (purging) runtimes operating from the main acquisition
|
|
447
|
+
# machine of any data acquisition system.
|
|
448
|
+
instance.raw_data.nk_path.touch()
|
|
449
|
+
|
|
537
450
|
# Returns the initialized SessionData instance to caller
|
|
538
451
|
return instance
|
|
539
452
|
|
|
@@ -547,9 +460,9 @@ class SessionData(YamlConfig):
|
|
|
547
460
|
"""Loads the SessionData instance from the target session's session_data.yaml file.
|
|
548
461
|
|
|
549
462
|
This method is used to load the data layout information of an already existing session. Primarily, this is used
|
|
550
|
-
when
|
|
551
|
-
|
|
552
|
-
|
|
463
|
+
when processing session data. Due to how SessionData is stored and used in the lab, this method always loads the
|
|
464
|
+
data layout from the session_data.yaml file stored inside the 'raw_data' session subfolder. Currently, all
|
|
465
|
+
interactions with Sun lab data require access to the 'raw_data' folder of each session.
|
|
553
466
|
|
|
554
467
|
Notes:
|
|
555
468
|
To create a new session, use the create() method instead.
|
|
@@ -613,10 +526,19 @@ class SessionData(YamlConfig):
|
|
|
613
526
|
# Returns the initialized SessionData instance to caller
|
|
614
527
|
return instance
|
|
615
528
|
|
|
529
|
+
def runtime_initialized(self) -> None:
|
|
530
|
+
"""Ensures that the 'nk.bin' marker file is removed from the session's raw_data folder.
|
|
531
|
+
|
|
532
|
+
The 'nk.bin' marker is generated as part of the SessionData initialization (creation) process to mark sessions
|
|
533
|
+
that did not fully initialize during runtime. This service method is designed to be called by the sl-experiment
|
|
534
|
+
library classes to remove the 'nk.bin' marker when it is safe to do so. It should not be called by end-users.
|
|
535
|
+
"""
|
|
536
|
+
self.raw_data.nk_path.unlink(missing_ok=True)
|
|
537
|
+
|
|
616
538
|
def _save(self) -> None:
|
|
617
539
|
"""Saves the instance data to the 'raw_data' directory of the managed session as a 'session_data.yaml' file.
|
|
618
540
|
|
|
619
|
-
This is used to save the data stored in the instance to disk, so that it can be reused during
|
|
541
|
+
This is used to save the data stored in the instance to disk, so that it can be reused during further stages of
|
|
620
542
|
data processing. The method is intended to only be used by the SessionData instance itself during its
|
|
621
543
|
create() method runtime.
|
|
622
544
|
"""
|
|
@@ -631,6 +553,10 @@ class SessionData(YamlConfig):
|
|
|
631
553
|
origin.raw_data = None # type: ignore
|
|
632
554
|
origin.processed_data = None # type: ignore
|
|
633
555
|
|
|
556
|
+
# Converts StringEnum instances to strings
|
|
557
|
+
origin.session_type = str(origin.session_type)
|
|
558
|
+
origin.acquisition_system = str(origin.acquisition_system)
|
|
559
|
+
|
|
634
560
|
# Saves instance data as a .YAML file
|
|
635
561
|
origin.to_yaml(file_path=self.raw_data.session_data_path)
|
|
636
562
|
|
|
@@ -667,6 +593,16 @@ class ProcessingTracker(YamlConfig):
|
|
|
667
593
|
else:
|
|
668
594
|
self._lock_path = ""
|
|
669
595
|
|
|
596
|
+
def __del__(self) -> None:
|
|
597
|
+
"""If the instance is garbage-collected without calling the stop() method, assumes this is due to a runtime
|
|
598
|
+
error.
|
|
599
|
+
|
|
600
|
+
It is essential to always resolve the runtime as either 'stopped' or 'erred' to avoid deadlocking the session
|
|
601
|
+
data.
|
|
602
|
+
"""
|
|
603
|
+
if self._is_running:
|
|
604
|
+
self.error()
|
|
605
|
+
|
|
670
606
|
def _load_state(self) -> None:
|
|
671
607
|
"""Reads the current processing state from the wrapped .YAML file."""
|
|
672
608
|
if self.file_path.exists():
|
|
@@ -777,7 +713,11 @@ class ProcessingTracker(YamlConfig):
|
|
|
777
713
|
raise Timeout(message) # Fallback to appease mypy, should not be reachable
|
|
778
714
|
|
|
779
715
|
def stop(self) -> None:
|
|
780
|
-
"""
|
|
716
|
+
"""Configures the tracker file to indicate that the tracked processing runtime has been completed successfully.
|
|
717
|
+
|
|
718
|
+
After this method returns, it is UNSAFE to do any further processing from the process that calls this method.
|
|
719
|
+
Any process that calls the 'start' method of this class is expected to also call this method or 'error' method
|
|
720
|
+
at the end of the runtime.
|
|
781
721
|
|
|
782
722
|
Raises:
|
|
783
723
|
TimeoutError: If the file lock for the target .YAML file cannot be acquired within the timeout period.
|
|
@@ -819,7 +759,7 @@ class ProcessingTracker(YamlConfig):
|
|
|
819
759
|
@property
|
|
820
760
|
def is_complete(self) -> bool:
|
|
821
761
|
"""Returns True if the tracker wrapped by the instance indicates that the processing runtime has been completed
|
|
822
|
-
successfully and
|
|
762
|
+
successfully at least once and that there is no ongoing processing that uses the target session."""
|
|
823
763
|
try:
|
|
824
764
|
# Acquires the lock
|
|
825
765
|
lock = FileLock(self._lock_path)
|
|
@@ -840,8 +780,8 @@ class ProcessingTracker(YamlConfig):
|
|
|
840
780
|
|
|
841
781
|
@property
|
|
842
782
|
def encountered_error(self) -> bool:
|
|
843
|
-
"""Returns True if the tracker wrapped by the instance indicates that the processing runtime
|
|
844
|
-
encountering an error
|
|
783
|
+
"""Returns True if the tracker wrapped by the instance indicates that the processing runtime for the target
|
|
784
|
+
session has aborted due to encountering an error."""
|
|
845
785
|
try:
|
|
846
786
|
# Acquires the lock
|
|
847
787
|
lock = FileLock(self._lock_path)
|
|
@@ -863,7 +803,7 @@ class ProcessingTracker(YamlConfig):
|
|
|
863
803
|
@property
|
|
864
804
|
def is_running(self) -> bool:
|
|
865
805
|
"""Returns True if the tracker wrapped by the instance indicates that the processing runtime is currently
|
|
866
|
-
running
|
|
806
|
+
running for the target session."""
|
|
867
807
|
try:
|
|
868
808
|
# Acquires the lock
|
|
869
809
|
lock = FileLock(self._lock_path)
|