sl-shared-assets 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +80 -0
- sl_shared_assets/__init__.pyi +73 -0
- sl_shared_assets/cli.py +384 -0
- sl_shared_assets/cli.pyi +94 -0
- sl_shared_assets/data_classes/__init__.py +66 -0
- sl_shared_assets/data_classes/__init__.pyi +61 -0
- sl_shared_assets/data_classes/configuration_data.py +479 -0
- sl_shared_assets/data_classes/configuration_data.pyi +199 -0
- sl_shared_assets/data_classes/runtime_data.py +251 -0
- sl_shared_assets/data_classes/runtime_data.pyi +145 -0
- sl_shared_assets/data_classes/session_data.py +625 -0
- sl_shared_assets/data_classes/session_data.pyi +252 -0
- sl_shared_assets/data_classes/surgery_data.py +152 -0
- sl_shared_assets/data_classes/surgery_data.pyi +89 -0
- sl_shared_assets/py.typed +0 -0
- sl_shared_assets/server/__init__.py +8 -0
- sl_shared_assets/server/__init__.pyi +8 -0
- sl_shared_assets/server/job.py +140 -0
- sl_shared_assets/server/job.pyi +94 -0
- sl_shared_assets/server/server.py +214 -0
- sl_shared_assets/server/server.pyi +95 -0
- sl_shared_assets/tools/__init__.py +15 -0
- sl_shared_assets/tools/__init__.pyi +15 -0
- sl_shared_assets/tools/ascension_tools.py +277 -0
- sl_shared_assets/tools/ascension_tools.pyi +68 -0
- sl_shared_assets/tools/packaging_tools.py +148 -0
- sl_shared_assets/tools/packaging_tools.pyi +56 -0
- sl_shared_assets/tools/project_management_tools.py +201 -0
- sl_shared_assets/tools/project_management_tools.pyi +54 -0
- sl_shared_assets/tools/transfer_tools.py +119 -0
- sl_shared_assets/tools/transfer_tools.pyi +53 -0
- sl_shared_assets-1.0.0.dist-info/METADATA +869 -0
- sl_shared_assets-1.0.0.dist-info/RECORD +36 -0
- sl_shared_assets-1.0.0.dist-info/WHEEL +4 -0
- sl_shared_assets-1.0.0.dist-info/entry_points.txt +8 -0
- sl_shared_assets-1.0.0.dist-info/licenses/LICENSE +674 -0
|
@@ -0,0 +1,625 @@
|
|
|
1
|
+
"""This module contains classes jointly responsible for maintaining the Sun lab project data hierarchy across all
|
|
2
|
+
machines used to acquire, process, and store the data. Every valid experiment or training session conducted in the
|
|
3
|
+
lab generates a specific directory structure. This structure is defined via the ProjectConfiguration and SessionData
|
|
4
|
+
classes, which are also stored as .yaml files inside each session's raw_data and processed_data directories. Jointly,
|
|
5
|
+
these classes contain all necessary information to restore the data hierarchy on any machine. All other Sun lab
|
|
6
|
+
libraries use these classes to work with all lab-generated data."""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
import copy
|
|
10
|
+
import shutil as sh
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from dataclasses import field, dataclass
|
|
13
|
+
|
|
14
|
+
from ataraxis_base_utilities import LogLevel, console, ensure_directory_exists
|
|
15
|
+
from ataraxis_data_structures import YamlConfig
|
|
16
|
+
from ataraxis_time.time_helpers import get_timestamp
|
|
17
|
+
|
|
18
|
+
from .configuration_data import get_system_configuration_data
|
|
19
|
+
|
|
20
|
+
# Stores all supported input for SessionData class 'session_type' fields.
|
|
21
|
+
_valid_session_types = {"lick training", "run training", "mesoscope experiment", "window checking"}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass()
|
|
25
|
+
class ProjectConfiguration(YamlConfig):
|
|
26
|
+
"""Stores the project-specific configuration parameters that do not change between different animals and runtime
|
|
27
|
+
sessions.
|
|
28
|
+
|
|
29
|
+
An instance of this class is generated and saved as a .yaml file in the 'configuration' directory of each project
|
|
30
|
+
when it is created. After that, the stored data is reused for every runtime (training or experiment session) carried
|
|
31
|
+
out for each animal of the project. Additionally, a copy of the most actual configuration file is saved inside each
|
|
32
|
+
runtime session's 'raw_data' folder, providing seamless integration between the managed data and various Sun lab
|
|
33
|
+
(sl-) libraries.
|
|
34
|
+
|
|
35
|
+
Notes:
|
|
36
|
+
Together with SessionData, this class forms the entry point for all interactions with the data acquired in the
|
|
37
|
+
Sun lab. The fields of this class are used to flexibly configure the runtime behavior of major data acquisition
|
|
38
|
+
(sl-experiment) and processing (sl-forgery) libraries, adapting them for any project in the lab.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
project_name: str = ""
|
|
42
|
+
"""Stores the descriptive name of the project. This name is used to create the root directory for the project and
|
|
43
|
+
to initialize SessionData instances each time any Sun lab library interacts with the session's data."""
|
|
44
|
+
surgery_sheet_id: str = ""
|
|
45
|
+
"""The ID of the Google Sheet file that stores information about surgical interventions performed on all animals
|
|
46
|
+
participating in the managed project. This log sheet is used to parse and write the surgical intervention data for
|
|
47
|
+
each animal into every runtime session raw_data folder, so that the surgery data is always kept together with the
|
|
48
|
+
rest of the training and experiment data."""
|
|
49
|
+
water_log_sheet_id: str = ""
|
|
50
|
+
"""The ID of the Google Sheet file that stores information about water restriction (and behavior tracker)
|
|
51
|
+
information for all animals participating in the managed project. This is used to synchronize the information
|
|
52
|
+
inside the water restriction log with the state of the animal at the end of each training or experiment session.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def load(cls, configuration_path: Path) -> "ProjectConfiguration":
|
|
57
|
+
"""Loads the project configuration parameters from the specified project_configuration.yaml file.
|
|
58
|
+
|
|
59
|
+
This method is called during each interaction with any runtime session's data, including the creation of a new
|
|
60
|
+
session.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
configuration_path: The path to the project_configuration.yaml file from which to load the data.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
The initialized ProjectConfiguration instance that stores the configuration data for the target project.
|
|
67
|
+
|
|
68
|
+
Raise:
|
|
69
|
+
FileNotFoundError: If the specified configuration file does not exist or is not a valid YAML file.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
# Prevents loading non-existent files.
|
|
73
|
+
if configuration_path.suffix != ".yaml" or not configuration_path.exists():
|
|
74
|
+
message = (
|
|
75
|
+
f"Unable to load the project configuration data from the specified path: {configuration_path}. Valid "
|
|
76
|
+
f"configuration file paths should use the '.yaml' extension and point to an existing file."
|
|
77
|
+
)
|
|
78
|
+
console.error(message=message, error=FileNotFoundError)
|
|
79
|
+
|
|
80
|
+
# Loads the data from the YAML file and initializes the class instance.
|
|
81
|
+
instance: ProjectConfiguration = cls.from_yaml(file_path=configuration_path) # type: ignore
|
|
82
|
+
|
|
83
|
+
# Verifies the loaded data. Most importantly, this step does not allow proceeding if the user did not
|
|
84
|
+
# replace the surgery log and water restriction log placeholders with valid ID values.
|
|
85
|
+
instance._verify_data()
|
|
86
|
+
|
|
87
|
+
# Returns the initialized class instance to caller
|
|
88
|
+
return instance
|
|
89
|
+
|
|
90
|
+
def save(self, path: Path) -> None:
|
|
91
|
+
"""Saves class instance data to disk as a project_configuration.yaml file.
|
|
92
|
+
|
|
93
|
+
This method is automatically called from the 'sl_experiment' library when a new project is created. After this
|
|
94
|
+
method's runtime, all future project initialization calls will use the load() method to reuse configuration data
|
|
95
|
+
saved to the .yaml file created by this method.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
path: The path to the .yaml file to save the data to.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
# Saves the data to the YAML file
|
|
102
|
+
self.to_yaml(file_path=path)
|
|
103
|
+
|
|
104
|
+
def _verify_data(self) -> None:
|
|
105
|
+
"""Verifies the user-modified data loaded from the project_configuration.yaml file.
|
|
106
|
+
|
|
107
|
+
Since this class is explicitly designed to be modified by the user, this verification step is carried out to
|
|
108
|
+
ensure that the loaded data matches expectations. This reduces the potential for user errors to impact the
|
|
109
|
+
runtime behavior of the libraries using this class. This internal method is automatically called by the load()
|
|
110
|
+
method.
|
|
111
|
+
|
|
112
|
+
Raises:
|
|
113
|
+
ValueError: If the loaded data does not match expected formats or values.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
# Verifies Google Sheet ID formatting. Google Sheet IDs are usually 44 characters long, containing letters,
|
|
117
|
+
# numbers, hyphens, and underscores
|
|
118
|
+
pattern = r"^[a-zA-Z0-9_-]{44}$"
|
|
119
|
+
if not re.match(pattern, self.surgery_sheet_id):
|
|
120
|
+
message = (
|
|
121
|
+
f"Unable to verify the surgery_sheet_id field loaded from the 'project_configuration.yaml' file. "
|
|
122
|
+
f"Expected a string with 44 characters, using letters, numbers, hyphens, and underscores, but found: "
|
|
123
|
+
f"{self.surgery_sheet_id}."
|
|
124
|
+
)
|
|
125
|
+
console.error(message=message, error=ValueError)
|
|
126
|
+
if not re.match(pattern, self.water_log_sheet_id):
|
|
127
|
+
message = (
|
|
128
|
+
f"Unable to verify the surgery_sheet_id field loaded from the 'project_configuration.yaml' file. "
|
|
129
|
+
f"Expected a string with 44 characters, using letters, numbers, hyphens, and underscores, but found: "
|
|
130
|
+
f"{self.water_log_sheet_id}."
|
|
131
|
+
)
|
|
132
|
+
console.error(message=message, error=ValueError)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@dataclass()
|
|
136
|
+
class RawData:
|
|
137
|
+
"""Stores the paths to the directories and files that make up the 'raw_data' session-specific directory.
|
|
138
|
+
|
|
139
|
+
The raw_data directory stores the data acquired during the session runtime before and after preprocessing. Since
|
|
140
|
+
preprocessing does not alter the data, any data in that folder is considered 'raw'.
|
|
141
|
+
|
|
142
|
+
Notes:
|
|
143
|
+
Sun lab data management strategy primarily relies on keeping multiple redundant copies of the raw_data for
|
|
144
|
+
each acquired session. Typically, one copy is stored on the lab's processing server and the other is stored on
|
|
145
|
+
the NAS.
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
raw_data_path: Path = Path()
|
|
149
|
+
"""Stores the path to the root raw_data directory of the session. This directory stores all raw data during
|
|
150
|
+
acquisition and preprocessing. Note, preprocessing does not alter raw data, so at any point in time all data inside
|
|
151
|
+
the folder is considered 'raw'."""
|
|
152
|
+
camera_data_path: Path = Path()
|
|
153
|
+
"""Stores the path to the directory that contains all camera data acquired during the session. Primarily, this
|
|
154
|
+
includes .mp4 video files from each recorded camera."""
|
|
155
|
+
mesoscope_data_path: Path = Path()
|
|
156
|
+
"""Stores the path to the directory that contains all Mesoscope data acquired during the session. Primarily, this
|
|
157
|
+
includes the mesoscope-acquired .tiff files (brain activity data) and the motion estimation data. This directory is
|
|
158
|
+
created for all sessions, but is only used (filled) by the sessions that use the Mesoscope-VR system to acquire
|
|
159
|
+
brain activity data."""
|
|
160
|
+
behavior_data_path: Path = Path()
|
|
161
|
+
"""Stores the path to the directory that contains all non-video behavior data acquired during the session.
|
|
162
|
+
Primarily, this includes the .npz log files that store serialized data acquired by all hardware components of the
|
|
163
|
+
data acquisition system other than cameras and brain activity data acquisition devices (such as the Mesoscope).
|
|
164
|
+
The reason why the directory is called 'behavior' is primarily because all .npz files are parsed to infer the
|
|
165
|
+
behavior of the animal, in contrast to brain (cell) activity data."""
|
|
166
|
+
zaber_positions_path: Path = Path()
|
|
167
|
+
"""Stores the path to the zaber_positions.yaml file. This file contains the snapshot of all Zaber motor positions
|
|
168
|
+
at the end of the session. Zaber motors are used to position the LickPort and the HeadBar manipulators, which is
|
|
169
|
+
essential for supporting proper brain imaging and animal's running behavior during the session. This file is only
|
|
170
|
+
created for sessions that use the Mesoscope-VR system."""
|
|
171
|
+
session_descriptor_path: Path = Path()
|
|
172
|
+
"""Stores the path to the session_descriptor.yaml file. This file is partially filled by the system during runtime
|
|
173
|
+
and partially by the experimenter after the runtime. It contains session-specific information, such as the specific
|
|
174
|
+
task parameters and the notes made by the experimenter during runtime."""
|
|
175
|
+
hardware_state_path: Path = Path()
|
|
176
|
+
"""Stores the path to the hardware_state.yaml file. This file contains the partial snapshot of the calibration
|
|
177
|
+
parameters used by the data acquisition and runtime management system modules during the session. Primarily,
|
|
178
|
+
this is used during data processing to read the .npz data log files generated during runtime."""
|
|
179
|
+
surgery_metadata_path: Path = Path()
|
|
180
|
+
"""Stores the path to the surgery_metadata.yaml file. This file contains the most actual information about the
|
|
181
|
+
surgical intervention(s) performed on the animal prior to the session."""
|
|
182
|
+
project_configuration_path: Path = Path()
|
|
183
|
+
"""Stores the path to the project_configuration.yaml file. This file contains the snapshot of the configuration
|
|
184
|
+
parameters for the session's project."""
|
|
185
|
+
session_data_path: Path = Path()
|
|
186
|
+
"""Stores the path to the session_data.yaml file. This path is used by the SessionData instance to save itself to
|
|
187
|
+
disk as a .yaml file. The file contains the paths to all raw and processed data directories used during data
|
|
188
|
+
acquisition or processing runtime."""
|
|
189
|
+
experiment_configuration_path: Path = Path()
|
|
190
|
+
"""Stores the path to the experiment_configuration.yaml file. This file contains the snapshot of the
|
|
191
|
+
experiment runtime configuration used by the session. This file is only created for experiment sessions."""
|
|
192
|
+
mesoscope_positions_path: Path = Path()
|
|
193
|
+
"""Stores the path to the mesoscope_positions.yaml file. This file contains the snapshot of the positions used
|
|
194
|
+
by the Mesoscope at the end of the session. This includes both the physical position of the mesoscope objective and
|
|
195
|
+
the 'virtual' tip, tilt, and fastZ positions set via ScanImage software. This file is only created for sessions that
|
|
196
|
+
use the Mesoscope-VR system to acquire brain activity data."""
|
|
197
|
+
window_screenshot_path: Path = Path()
|
|
198
|
+
"""Stores the path to the .png screenshot of the ScanImagePC screen. The screenshot should contain the image of the
|
|
199
|
+
cranial window and the red-dot alignment windows. This is used to generate a visual snapshot of the cranial window
|
|
200
|
+
alignment and appearance for each experiment session. This file is only created for sessions that use the
|
|
201
|
+
Mesoscope-VR system to acquire brain activity data."""
|
|
202
|
+
system_configuration_path: Path = Path()
|
|
203
|
+
"""Stores the path to the system_configuration.yaml file. This file contains the exact snapshot of the data
|
|
204
|
+
acquisition and runtime management system configuration parameters used to acquire session data."""
|
|
205
|
+
checksum_path: Path = Path()
|
|
206
|
+
"""Stores the path to the ax_checksum.txt file. This file is generated as part of packaging the data for
|
|
207
|
+
transmission and stores the xxHash-128 checksum of the data. It is used to verify that the transmission did not
|
|
208
|
+
damage or otherwise alter the data."""
|
|
209
|
+
telomere_path: Path = Path()
|
|
210
|
+
"""Stores the path to the telomere.bin file. This file is statically generated at the end of the session's data
|
|
211
|
+
acquisition based on experimenter feedback to mark sessions that ran in-full with no issues. Sessions without a
|
|
212
|
+
telomere.bin file are considered 'incomplete' and are excluded from all automated processing, as they may contain
|
|
213
|
+
corrupted, incomplete, or otherwise unusable data."""
|
|
214
|
+
ubiquitin_path: Path = Path()
|
|
215
|
+
"""Stores the path to the ubiquitin.bin file. This file is primarily used by the sl-experiment libraries to mark
|
|
216
|
+
local session data directories for deletion (purging). Typically, it is created once the data is safely moved to
|
|
217
|
+
the long-term storage destinations (NAS and Server) and the integrity of the moved data is verified on at least one
|
|
218
|
+
destination. During 'purge' sl-experiment runtimes, the library discovers and removes all session data marked with
|
|
219
|
+
'ubiquitin.bin' files from the machine that runs the code."""
|
|
220
|
+
verified_bin_path: Path = Path()
|
|
221
|
+
"""Stores the path to the verified.bin file. This marker file is created (or removed) by the sl-shared-assets
|
|
222
|
+
'verify-session' CLI command to indicate whether the session data inside the folder marked by the file has been
|
|
223
|
+
verified for integrity. Primarily, this is used when the data is moved to the long-term storage destination (BioHPC
|
|
224
|
+
server) to ensure it is safe to remove any local copies of the data stored on the acquisition system machine(s)."""
|
|
225
|
+
|
|
226
|
+
def resolve_paths(self, root_directory_path: Path) -> None:
|
|
227
|
+
"""Resolves all paths managed by the class instance based on the input root directory path.
|
|
228
|
+
|
|
229
|
+
This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
|
|
230
|
+
machine that instantiates the class.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
|
|
234
|
+
hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
|
|
235
|
+
the managed session.
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
# Generates the managed paths
|
|
239
|
+
self.raw_data_path = root_directory_path
|
|
240
|
+
self.camera_data_path = self.raw_data_path.joinpath("camera_data")
|
|
241
|
+
self.mesoscope_data_path = self.raw_data_path.joinpath("mesoscope_data")
|
|
242
|
+
self.behavior_data_path = self.raw_data_path.joinpath("behavior_data")
|
|
243
|
+
self.zaber_positions_path = self.raw_data_path.joinpath("zaber_positions.yaml")
|
|
244
|
+
self.session_descriptor_path = self.raw_data_path.joinpath("session_descriptor.yaml")
|
|
245
|
+
self.hardware_state_path = self.raw_data_path.joinpath("hardware_state.yaml")
|
|
246
|
+
self.surgery_metadata_path = self.raw_data_path.joinpath("surgery_metadata.yaml")
|
|
247
|
+
self.project_configuration_path = self.raw_data_path.joinpath("project_configuration.yaml")
|
|
248
|
+
self.session_data_path = self.raw_data_path.joinpath("session_data.yaml")
|
|
249
|
+
self.experiment_configuration_path = self.raw_data_path.joinpath("experiment_configuration.yaml")
|
|
250
|
+
self.mesoscope_positions_path = self.raw_data_path.joinpath("mesoscope_positions.yaml")
|
|
251
|
+
self.window_screenshot_path = self.raw_data_path.joinpath("window_screenshot.png")
|
|
252
|
+
self.checksum_path = self.raw_data_path.joinpath("ax_checksum.txt")
|
|
253
|
+
self.system_configuration_path = self.raw_data_path.joinpath("system_configuration.yaml")
|
|
254
|
+
self.telomere_path = self.raw_data_path.joinpath("telomere.bin")
|
|
255
|
+
self.ubiquitin_path = self.raw_data_path.joinpath("ubiquitin.bin")
|
|
256
|
+
self.verified_bin_path = self.raw_data_path.joinpath("verified.bin")
|
|
257
|
+
|
|
258
|
+
def make_directories(self) -> None:
|
|
259
|
+
"""Ensures that all major subdirectories and the root directory exist, creating any missing directories."""
|
|
260
|
+
ensure_directory_exists(self.raw_data_path)
|
|
261
|
+
ensure_directory_exists(self.camera_data_path)
|
|
262
|
+
ensure_directory_exists(self.mesoscope_data_path)
|
|
263
|
+
ensure_directory_exists(self.behavior_data_path)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
@dataclass()
|
|
267
|
+
class ProcessedData:
|
|
268
|
+
"""Stores the paths to the directories and files that make up the 'processed_data' session-specific directory.
|
|
269
|
+
|
|
270
|
+
The processed_data directory stores the data generated by various processing pipelines from the raw data (contents
|
|
271
|
+
of the raw_data directory). Processed data represents an intermediate step between raw data and the dataset used in
|
|
272
|
+
the data analysis, but is not itself designed to be analyzed.
|
|
273
|
+
"""
|
|
274
|
+
|
|
275
|
+
processed_data_path: Path = Path()
|
|
276
|
+
"""Stores the path to the root processed_data directory of the session. This directory stores the processed data
|
|
277
|
+
as it is generated by various data processing pipelines."""
|
|
278
|
+
camera_data_path: Path = Path()
|
|
279
|
+
"""Stores the path to the directory that contains video tracking data generated by our DeepLabCut-based video
|
|
280
|
+
processing pipelines."""
|
|
281
|
+
mesoscope_data_path: Path = Path()
|
|
282
|
+
"""Stores path to the directory that contains processed brain activity (cell) data generated by our suite2p-based
|
|
283
|
+
photometry processing pipelines (single-day and multi-day). This directory is only used by sessions acquired with
|
|
284
|
+
the Mesoscope-VR system. For all other sessions, it will be created, but kept empty."""
|
|
285
|
+
behavior_data_path: Path = Path()
|
|
286
|
+
"""Stores the path to the directory that contains the non-video and non-brain-activity data extracted from
|
|
287
|
+
.npz log files by our in-house log parsing pipeline."""
|
|
288
|
+
job_logs_path: Path = Path()
|
|
289
|
+
"""Stores the path to the directory that stores the standard output and standard error data collected during
|
|
290
|
+
server-side data processing pipeline runtimes. This directory is primarily used when running data processing jobs
|
|
291
|
+
on the remote server. However, it is possible to configure local runtimes to also redirect log data to files
|
|
292
|
+
stored in this directory (by editing ataraxis-base-utilities 'console' variable)."""
|
|
293
|
+
single_day_suite2p_bin_path: Path = Path()
|
|
294
|
+
"""Stores the path to the single_day_suite2p.bin file. This file is created by our single-day suite2p data
|
|
295
|
+
processing pipeline to mark sessions that have been successfully processed with the single-day sl-suite2p library
|
|
296
|
+
pipeline. Note, the file is removed at the beginning of the suite2p pipeline, so its presence always indicates
|
|
297
|
+
successful processing runtime completion."""
|
|
298
|
+
multi_day_suite2p_bin_path: Path = Path()
|
|
299
|
+
"""Same as single_day_suite2p_bin_path, but tracks whether the session has been successfully processed with the
|
|
300
|
+
multi-day suite2p pipeline."""
|
|
301
|
+
behavior_bin_path: Path = Path()
|
|
302
|
+
"""Stores the path to the behavior.bin file. This file is created by our behavior data extraction pipeline
|
|
303
|
+
to mark sessions that have been successfully processed with the sl-behavior library pipeline. Note, the
|
|
304
|
+
file is removed at the beginning of the behavior data extraction pipeline, so its presence always indicates
|
|
305
|
+
successful processing runtime completion."""
|
|
306
|
+
dlc_bin_path: Path = Path()
|
|
307
|
+
"""Stores the path to the dlc.bin file. This file is created by our DeepLabCut-based pose tracking pipeline
|
|
308
|
+
to mark sessions that have been successfully processed with the sl-dlc library pipeline. Note, the
|
|
309
|
+
file is removed at the beginning of the DeepLabCut pipeline, so its presence always indicates successful processing
|
|
310
|
+
runtime completion."""
|
|
311
|
+
|
|
312
|
+
def resolve_paths(self, root_directory_path: Path) -> None:
|
|
313
|
+
"""Resolves all paths managed by the class instance based on the input root directory path.
|
|
314
|
+
|
|
315
|
+
This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
|
|
316
|
+
machine that instantiates the class.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
|
|
320
|
+
hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
|
|
321
|
+
the managed session.
|
|
322
|
+
"""
|
|
323
|
+
# Generates the managed paths
|
|
324
|
+
self.processed_data_path = root_directory_path
|
|
325
|
+
self.camera_data_path = self.processed_data_path.joinpath("camera_data")
|
|
326
|
+
self.mesoscope_data_path = self.processed_data_path.joinpath("mesoscope_data")
|
|
327
|
+
self.behavior_data_path = self.processed_data_path.joinpath("behavior_data")
|
|
328
|
+
self.job_logs_path = self.processed_data_path.joinpath("job_logs")
|
|
329
|
+
self.single_day_suite2p_bin_path = self.processed_data_path.joinpath("single_day_suite2p.bin")
|
|
330
|
+
self.multi_day_suite2p_bin_path = self.processed_data_path.joinpath("multi_day_suite2p.bin")
|
|
331
|
+
self.behavior_bin_path = self.processed_data_path.joinpath("behavior.bin")
|
|
332
|
+
self.dlc_bin_path = self.processed_data_path.joinpath("dlc.bin")
|
|
333
|
+
|
|
334
|
+
def make_directories(self) -> None:
|
|
335
|
+
"""Ensures that all major subdirectories and the root directory exist, creating any missing directories."""
|
|
336
|
+
|
|
337
|
+
ensure_directory_exists(self.processed_data_path)
|
|
338
|
+
ensure_directory_exists(self.camera_data_path)
|
|
339
|
+
ensure_directory_exists(self.behavior_data_path)
|
|
340
|
+
ensure_directory_exists(self.job_logs_path)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
@dataclass
|
|
344
|
+
class SessionData(YamlConfig):
|
|
345
|
+
"""Stores and manages the data layout of a single training or experiment session acquired in the Sun lab.
|
|
346
|
+
|
|
347
|
+
The primary purpose of this class is to maintain the session data structure across all supported destinations and
|
|
348
|
+
during all processing stages. It generates the paths used by all other classes from all Sun lab libraries that
|
|
349
|
+
interact with the session's data from the point of its creation and until the data is integrated into an
|
|
350
|
+
analysis dataset.
|
|
351
|
+
|
|
352
|
+
When necessary, the class can be used to either generate a new session or load the layout of an already existing
|
|
353
|
+
session. When the class is used to create a new session, it generates the new session's name using the current
|
|
354
|
+
UTC timestamp, accurate to microseconds. This ensures that each session name is unique and preserves the overall
|
|
355
|
+
session order.
|
|
356
|
+
|
|
357
|
+
Notes:
|
|
358
|
+
This class is specifically designed for working with the data from a single session, performed by a single
|
|
359
|
+
animal under the specific experiment. The class is used to manage both raw and processed data. It follows the
|
|
360
|
+
data through acquisition, preprocessing and processing stages of the Sun lab data workflow. Together with
|
|
361
|
+
ProjectConfiguration class, this class serves as an entry point for all interactions with the managed session's
|
|
362
|
+
data.
|
|
363
|
+
"""
|
|
364
|
+
|
|
365
|
+
project_name: str
|
|
366
|
+
"""Stores the name of the managed session's project."""
|
|
367
|
+
animal_id: str
|
|
368
|
+
"""Stores the unique identifier of the animal that participates in the managed session."""
|
|
369
|
+
session_name: str
|
|
370
|
+
"""Stores the name (timestamp-based ID) of the managed session."""
|
|
371
|
+
session_type: str
|
|
372
|
+
"""Stores the type of the session. Primarily, this determines how to read the session_descriptor.yaml file. Has
|
|
373
|
+
to be set to one of the supported types: 'lick training', 'run training', 'window checking' or
|
|
374
|
+
'mesoscope experiment'.
|
|
375
|
+
"""
|
|
376
|
+
acquisition_system: str
|
|
377
|
+
"""Stores the name of the data acquisition and runtime management system that acquired the data."""
|
|
378
|
+
experiment_name: str | None
|
|
379
|
+
"""Stores the name of the experiment configuration file. If the session_type field is set to 'Experiment' and this
|
|
380
|
+
field is not None (null), it communicates the specific experiment configuration used by the session. During runtime,
|
|
381
|
+
the name stored here is used to load the specific experiment configuration data stored in a .yaml file with the
|
|
382
|
+
same name. If the session is not an experiment session, this field is ignored."""
|
|
383
|
+
raw_data: RawData = field(default_factory=lambda: RawData())
|
|
384
|
+
"""Stores the paths to all subfolders and files found under the /project/animal/session/raw_data directory of any
|
|
385
|
+
PC used to work with Sun lab data."""
|
|
386
|
+
processed_data: ProcessedData = field(default_factory=lambda: ProcessedData())
|
|
387
|
+
"""Stores the paths to all subfolders and files found under the /project/animal/session/processed_data directory of
|
|
388
|
+
any PC used to work with Sun lab data."""
|
|
389
|
+
|
|
390
|
+
def __post_init__(self) -> None:
|
|
391
|
+
"""Ensures raw_data and processed_data are always instances of RawData and ProcessedData."""
|
|
392
|
+
if not isinstance(self.raw_data, RawData):
|
|
393
|
+
self.raw_data = RawData()
|
|
394
|
+
|
|
395
|
+
if not isinstance(self.processed_data, ProcessedData):
|
|
396
|
+
self.processed_data = ProcessedData()
|
|
397
|
+
|
|
398
|
+
@classmethod
|
|
399
|
+
def create(
|
|
400
|
+
cls,
|
|
401
|
+
project_name: str,
|
|
402
|
+
animal_id: str,
|
|
403
|
+
session_type: str,
|
|
404
|
+
experiment_name: str | None = None,
|
|
405
|
+
session_name: str | None = None,
|
|
406
|
+
) -> "SessionData":
|
|
407
|
+
"""Creates a new SessionData object and generates the new session's data structure on the local PC.
|
|
408
|
+
|
|
409
|
+
This method is intended to be called exclusively by the sl-experiment library to create new training or
|
|
410
|
+
experiment sessions and generate the session data directory tree.
|
|
411
|
+
|
|
412
|
+
Notes:
|
|
413
|
+
To load an already existing session data structure, use the load() method instead.
|
|
414
|
+
|
|
415
|
+
This method automatically dumps the data of the created SessionData instance into the session_data.yaml file
|
|
416
|
+
inside the root raw_data directory of the created hierarchy. It also finds and dumps other configuration
|
|
417
|
+
files, such as project_configuration.yaml, experiment_configuration.yaml, and system_configuration.yaml into
|
|
418
|
+
the same raw_data directory. This ensures that if the session's runtime is interrupted unexpectedly, the
|
|
419
|
+
acquired data can still be processed.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
project_name: The name of the project for which the data is acquired.
|
|
423
|
+
animal_id: The ID code of the animal for which the data is acquired.
|
|
424
|
+
session_type: The type of the session. Primarily, this determines how to read the session_descriptor.yaml
|
|
425
|
+
file. Valid options are 'Lick training', 'Run training', 'Window checking', or 'Experiment'.
|
|
426
|
+
experiment_name: The name of the experiment executed during managed session. This optional argument is only
|
|
427
|
+
used for 'Experiment' session types. It is used to find the experiment configuration .YAML file.
|
|
428
|
+
session_name: An optional session_name override. Generally, this argument should not be provided for most
|
|
429
|
+
sessions. When provided, the method uses this name instead of generating a new timestamp-based name.
|
|
430
|
+
This is only used during the 'ascension' runtime to convert old data structures to the modern
|
|
431
|
+
lab standards.
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
An initialized SessionData instance that stores the layout of the newly created session's data.
|
|
435
|
+
"""
|
|
436
|
+
|
|
437
|
+
if session_type.lower() not in _valid_session_types:
|
|
438
|
+
message = (
|
|
439
|
+
f"Invalid session type '{session_type.lower()}' encountered when creating a new SessionData instance. "
|
|
440
|
+
f"Use one of the supported session types: {_valid_session_types}"
|
|
441
|
+
)
|
|
442
|
+
console.error(message=message, error=ValueError)
|
|
443
|
+
|
|
444
|
+
# Acquires the UTC timestamp to use as the session name, unless a name override is provided
|
|
445
|
+
if session_name is None:
|
|
446
|
+
session_name = str(get_timestamp(time_separator="-"))
|
|
447
|
+
|
|
448
|
+
# Resolves the acquisition system configuration. This queries the acquisition system configuration data used
|
|
449
|
+
# by the machine (PC) that calls this method.
|
|
450
|
+
acquisition_system = get_system_configuration_data()
|
|
451
|
+
|
|
452
|
+
# Constructs the root session directory path
|
|
453
|
+
session_path = acquisition_system.paths.root_directory.joinpath(project_name, animal_id, session_name)
|
|
454
|
+
|
|
455
|
+
# Prevents creating new sessions for non-existent projects.
|
|
456
|
+
if not acquisition_system.paths.root_directory.joinpath(project_name).exists():
|
|
457
|
+
message = (
|
|
458
|
+
f"Unable to create the session directory hierarchy for the session {session_name} of the animal "
|
|
459
|
+
f"'{animal_id}' and project '{project_name}'. The project does not exist on the local machine (PC). "
|
|
460
|
+
f"Use the 'sl-create-project' CLI command to create the project on the local machine before creating "
|
|
461
|
+
f"new sessions."
|
|
462
|
+
)
|
|
463
|
+
console.error(message=message, error=FileNotFoundError)
|
|
464
|
+
|
|
465
|
+
# Handles potential session name conflicts
|
|
466
|
+
counter = 0
|
|
467
|
+
while session_path.exists():
|
|
468
|
+
counter += 1
|
|
469
|
+
new_session_name = f"{session_name}_{counter}"
|
|
470
|
+
session_path = acquisition_system.paths.root_directory.joinpath(project_name, animal_id, new_session_name)
|
|
471
|
+
|
|
472
|
+
# If a conflict is detected and resolved, warns the user about the resolved conflict.
|
|
473
|
+
if counter > 0:
|
|
474
|
+
message = (
|
|
475
|
+
f"Session name conflict occurred for animal '{animal_id}' of project '{project_name}' "
|
|
476
|
+
f"when adding the new session with timestamp {session_name}. The session with identical name "
|
|
477
|
+
f"already exists. The newly created session directory uses a '_{counter}' postfix to distinguish "
|
|
478
|
+
f"itself from the already existing session directory."
|
|
479
|
+
)
|
|
480
|
+
console.echo(message=message, level=LogLevel.ERROR)
|
|
481
|
+
|
|
482
|
+
# Generates subclasses stored inside the main class instance based on the data resolved above.
|
|
483
|
+
raw_data = RawData()
|
|
484
|
+
raw_data.resolve_paths(root_directory_path=session_path.joinpath("raw_data"))
|
|
485
|
+
raw_data.make_directories() # Generates the local 'raw_data' directory tree
|
|
486
|
+
|
|
487
|
+
# Resolves, but does not make processed_data directories. All runtimes that require access to 'processed_data'
|
|
488
|
+
# are configured to generate those directories if necessary, so there is no need to make them here.
|
|
489
|
+
processed_data = ProcessedData()
|
|
490
|
+
processed_data.resolve_paths(root_directory_path=session_path.joinpath("processed_data"))
|
|
491
|
+
|
|
492
|
+
# Packages the sections generated above into a SessionData instance
|
|
493
|
+
instance = SessionData(
|
|
494
|
+
project_name=project_name,
|
|
495
|
+
animal_id=animal_id,
|
|
496
|
+
session_name=session_name,
|
|
497
|
+
session_type=session_type.lower(),
|
|
498
|
+
acquisition_system=acquisition_system.name,
|
|
499
|
+
raw_data=raw_data,
|
|
500
|
+
processed_data=processed_data,
|
|
501
|
+
experiment_name=experiment_name,
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
# Saves the configured instance data to the session's folder, so that it can be reused during processing or
|
|
505
|
+
# preprocessing.
|
|
506
|
+
instance._save()
|
|
507
|
+
|
|
508
|
+
# Also saves the ProjectConfiguration, SystemConfiguration, and ExperimentConfiguration instances to the same
|
|
509
|
+
# folder using the paths resolved for the RawData instance above.
|
|
510
|
+
|
|
511
|
+
# Copies the project_configuration.yaml file to session's folder
|
|
512
|
+
project_configuration_path = acquisition_system.paths.root_directory.joinpath(
|
|
513
|
+
project_name, "configuration", "project_configuration.yaml"
|
|
514
|
+
)
|
|
515
|
+
sh.copy2(project_configuration_path, instance.raw_data.project_configuration_path)
|
|
516
|
+
|
|
517
|
+
# Dumps the acquisition system's configuration data to session's folder
|
|
518
|
+
acquisition_system.save(path=instance.raw_data.system_configuration_path)
|
|
519
|
+
|
|
520
|
+
if experiment_name is not None:
|
|
521
|
+
# Copies the experiment_configuration.yaml file to session's folder
|
|
522
|
+
experiment_configuration_path = acquisition_system.paths.root_directory.joinpath(
|
|
523
|
+
project_name, "configuration", f"{experiment_name}.yaml"
|
|
524
|
+
)
|
|
525
|
+
sh.copy2(experiment_configuration_path, instance.raw_data.experiment_configuration_path)
|
|
526
|
+
|
|
527
|
+
# Returns the initialized SessionData instance to caller
|
|
528
|
+
return instance
|
|
529
|
+
|
|
530
|
+
@classmethod
|
|
531
|
+
def load(
|
|
532
|
+
cls,
|
|
533
|
+
session_path: Path,
|
|
534
|
+
processed_data_root: Path | None = None,
|
|
535
|
+
make_processed_data_directory: bool = False,
|
|
536
|
+
) -> "SessionData":
|
|
537
|
+
"""Loads the SessionData instance from the target session's session_data.yaml file.
|
|
538
|
+
|
|
539
|
+
This method is used to load the data layout information of an already existing session. Primarily, this is used
|
|
540
|
+
when preprocessing or processing session data. Due to how SessionData is stored and used in the lab, this
|
|
541
|
+
method always loads the data layout from the session_data.yaml file stored inside the raw_data session
|
|
542
|
+
subfolder. Currently, all interactions with Sun lab data require access to the 'raw_data' folder.
|
|
543
|
+
|
|
544
|
+
Notes:
|
|
545
|
+
To create a new session, use the create() method instead.
|
|
546
|
+
|
|
547
|
+
Args:
|
|
548
|
+
session_path: The path to the root directory of an existing session, e.g.: root/project/animal/session.
|
|
549
|
+
processed_data_root: If processed data is kept on a drive different from the one that stores raw data,
|
|
550
|
+
provide the path to the root project directory (directory that stores all Sun lab projects) on that
|
|
551
|
+
drive. The method will automatically resolve the project/animal/session/processed_data hierarchy using
|
|
552
|
+
this root path. If raw and processed data are kept on the same drive, keep this set to None.
|
|
553
|
+
make_processed_data_directory: Determines whether this method should create processed_data directory if it
|
|
554
|
+
does not exist.
|
|
555
|
+
|
|
556
|
+
Returns:
|
|
557
|
+
An initialized SessionData instance for the session whose data is stored at the provided path.
|
|
558
|
+
|
|
559
|
+
Raises:
|
|
560
|
+
FileNotFoundError: If the 'session_data.yaml' file is not found under the session_path/raw_data/ subfolder.
|
|
561
|
+
|
|
562
|
+
"""
|
|
563
|
+
# To properly initialize the SessionData instance, the provided path should contain the raw_data directory
|
|
564
|
+
# with session_data.yaml file.
|
|
565
|
+
session_data_path = session_path.joinpath("raw_data", "session_data.yaml")
|
|
566
|
+
if not session_data_path.exists():
|
|
567
|
+
message = (
|
|
568
|
+
f"Unable to load the SessionData class for the target session: {session_path.stem}. No "
|
|
569
|
+
f"session_data.yaml file was found inside the raw_data folder of the session. This likely "
|
|
570
|
+
f"indicates that the session runtime was interrupted before recording any data, or that the "
|
|
571
|
+
f"session path does not point to a valid session."
|
|
572
|
+
)
|
|
573
|
+
console.error(message=message, error=FileNotFoundError)
|
|
574
|
+
|
|
575
|
+
# Loads class data from .yaml file
|
|
576
|
+
instance: SessionData = cls.from_yaml(file_path=session_data_path) # type: ignore
|
|
577
|
+
|
|
578
|
+
# The method assumes that the 'donor' .yaml file is always stored inside the raw_data directory of the session
|
|
579
|
+
# to be processed. Since the directory itself might have moved (between or even within the same PC) relative to
|
|
580
|
+
# where it was when the SessionData snapshot was generated, reconfigures the paths to all raw_data files using
|
|
581
|
+
# the root from above.
|
|
582
|
+
local_root = session_path.parents[2]
|
|
583
|
+
|
|
584
|
+
# RAW DATA
|
|
585
|
+
new_root = local_root.joinpath(instance.project_name, instance.animal_id, instance.session_name, "raw_data")
|
|
586
|
+
instance.raw_data.resolve_paths(root_directory_path=new_root)
|
|
587
|
+
|
|
588
|
+
# Unless a different root is provided for processed data, it uses the same root as raw_data.
|
|
589
|
+
if processed_data_root is None:
|
|
590
|
+
processed_data_root = new_root
|
|
591
|
+
|
|
592
|
+
# Regenerates the processed_data path depending on the root resolution above
|
|
593
|
+
instance.processed_data.resolve_paths(
|
|
594
|
+
root_directory_path=processed_data_root.joinpath(
|
|
595
|
+
instance.project_name, instance.animal_id, instance.session_name, "processed_data"
|
|
596
|
+
)
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
# Generates processed data directories if requested and necessary
|
|
600
|
+
if make_processed_data_directory:
|
|
601
|
+
instance.processed_data.make_directories()
|
|
602
|
+
|
|
603
|
+
# Returns the initialized SessionData instance to caller
|
|
604
|
+
return instance
|
|
605
|
+
|
|
606
|
+
def _save(self) -> None:
|
|
607
|
+
"""Saves the instance data to the 'raw_data' directory of the managed session as a 'session_data.yaml' file.
|
|
608
|
+
|
|
609
|
+
This is used to save the data stored in the instance to disk, so that it can be reused during preprocessing or
|
|
610
|
+
data processing. The method is intended to only be used by the SessionData instance itself during its
|
|
611
|
+
create() method runtime.
|
|
612
|
+
"""
|
|
613
|
+
|
|
614
|
+
# Generates a copy of the original class to avoid modifying the instance that will be used for further
|
|
615
|
+
# processing
|
|
616
|
+
origin = copy.deepcopy(self)
|
|
617
|
+
|
|
618
|
+
# Resets all path fields to null. These fields are not loaded from disk when the instance is loaded, so setting
|
|
619
|
+
# them to null has no negative consequences. Conversely, keeping these fields with Path objects prevents the
|
|
620
|
+
# SessionData instance from being loaded from disk.
|
|
621
|
+
origin.raw_data = None # type: ignore
|
|
622
|
+
origin.processed_data = None # type: ignore
|
|
623
|
+
|
|
624
|
+
# Saves instance data as a .YAML file
|
|
625
|
+
origin.to_yaml(file_path=self.raw_data.session_data_path)
|