sl-shared-assets 1.0.0rc13__py3-none-any.whl → 1.0.0rc15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (42) hide show
  1. sl_shared_assets/__init__.py +27 -9
  2. sl_shared_assets/__init__.pyi +71 -0
  3. sl_shared_assets/cli.py +13 -14
  4. sl_shared_assets/cli.pyi +28 -0
  5. sl_shared_assets/data_classes/__init__.py +63 -0
  6. sl_shared_assets/data_classes/__init__.pyi +61 -0
  7. sl_shared_assets/data_classes/configuration_data.py +64 -0
  8. sl_shared_assets/data_classes/configuration_data.pyi +37 -0
  9. sl_shared_assets/data_classes/runtime_data.py +233 -0
  10. sl_shared_assets/data_classes/runtime_data.pyi +145 -0
  11. sl_shared_assets/data_classes/session_data.py +1275 -0
  12. sl_shared_assets/data_classes/session_data.pyi +527 -0
  13. sl_shared_assets/data_classes/surgery_data.py +152 -0
  14. sl_shared_assets/data_classes/surgery_data.pyi +89 -0
  15. sl_shared_assets/server/__init__.py +8 -0
  16. sl_shared_assets/server/__init__.pyi +8 -0
  17. sl_shared_assets/server/job.py +140 -0
  18. sl_shared_assets/server/job.pyi +94 -0
  19. sl_shared_assets/server/server.py +213 -0
  20. sl_shared_assets/server/server.pyi +95 -0
  21. sl_shared_assets/suite2p/__init__.py +8 -0
  22. sl_shared_assets/suite2p/__init__.pyi +4 -0
  23. sl_shared_assets/suite2p/multi_day.py +193 -0
  24. sl_shared_assets/suite2p/multi_day.pyi +99 -0
  25. sl_shared_assets/{suite2p.py → suite2p/single_day.py} +55 -32
  26. sl_shared_assets/suite2p/single_day.pyi +192 -0
  27. sl_shared_assets/tools/__init__.py +8 -0
  28. sl_shared_assets/tools/__init__.pyi +5 -0
  29. sl_shared_assets/{ascension_tools.py → tools/ascension_tools.py} +3 -6
  30. sl_shared_assets/tools/ascension_tools.pyi +68 -0
  31. sl_shared_assets/tools/packaging_tools.pyi +52 -0
  32. sl_shared_assets/tools/transfer_tools.pyi +53 -0
  33. {sl_shared_assets-1.0.0rc13.dist-info → sl_shared_assets-1.0.0rc15.dist-info}/METADATA +1 -1
  34. sl_shared_assets-1.0.0rc15.dist-info/RECORD +40 -0
  35. sl_shared_assets/data_classes.py +0 -1656
  36. sl_shared_assets/server.py +0 -293
  37. sl_shared_assets-1.0.0rc13.dist-info/RECORD +0 -14
  38. /sl_shared_assets/{packaging_tools.py → tools/packaging_tools.py} +0 -0
  39. /sl_shared_assets/{transfer_tools.py → tools/transfer_tools.py} +0 -0
  40. {sl_shared_assets-1.0.0rc13.dist-info → sl_shared_assets-1.0.0rc15.dist-info}/WHEEL +0 -0
  41. {sl_shared_assets-1.0.0rc13.dist-info → sl_shared_assets-1.0.0rc15.dist-info}/entry_points.txt +0 -0
  42. {sl_shared_assets-1.0.0rc13.dist-info → sl_shared_assets-1.0.0rc15.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1275 @@
1
+ """This module contains classes jointly responsible for maintaining the Sun lab project data hierarchy across all
2
+ machines used to acquire, process, and store the data. Every valid experiment or training session conducted in the
3
+ lab generates a specific directory structure. This structure is defined via the ProjectConfiguration and SessionData
4
+ classes, which are also stored as .yaml files inside each session's raw_data and processed_data directories. Jointly,
5
+ these classes contain all necessary information to restore the data hierarchy on any machine. All other Sun lab
6
+ libraries use these classes to work with all lab-generated data."""
7
+
8
+ import re
9
+ import copy
10
+ import shutil as sh
11
+ from pathlib import Path
12
+ from dataclasses import field, dataclass
13
+
14
+ import appdirs
15
+ from ataraxis_base_utilities import LogLevel, console, ensure_directory_exists
16
+ from ataraxis_data_structures import YamlConfig
17
+ from ataraxis_time.time_helpers import get_timestamp
18
+
19
+ from .configuration_data import ExperimentConfiguration
20
+
21
+
22
+ def replace_root_path(path: Path) -> None:
23
+ """Replaces the path to the local root directory used to store all Sun lab projects with the provided path.
24
+
25
+ The first time ProjectConfiguration class is instantiated to create a new project on a new machine,
26
+ it asks the user to provide the path to the local directory where to save all Sun lab projects. This path is then
27
+ stored inside the default user data directory as a .yaml file to be reused for all future projects. To support
28
+ replacing this path without searching for the user data directory, which is usually hidden, this function finds and
29
+ updates the contents of the file that stores the local root path.
30
+
31
+ Args:
32
+ path: The path to the new local root directory.
33
+ """
34
+ # Resolves the path to the static .txt file used to store the local path to the root directory
35
+ app_dir = Path(appdirs.user_data_dir(appname="sun_lab_data", appauthor="sun_lab"))
36
+ path_file = app_dir.joinpath("root_path.txt")
37
+
38
+ # In case this function is called before the app directory is created, ensures the app directory exists
39
+ ensure_directory_exists(path_file)
40
+
41
+ # Ensures that the input root directory exists
42
+ ensure_directory_exists(path)
43
+
44
+ # Replaces the contents of the root_path.txt file with the provided path
45
+ with open(path_file, "w") as f:
46
+ f.write(str(path))
47
+
48
+
49
+ @dataclass()
50
+ class ProjectConfiguration(YamlConfig):
51
+ """Stores the project-specific configuration parameters that do not change between different animals and runtime
52
+ sessions.
53
+
54
+ An instance of this class is generated and saved as a .yaml file in the 'configuration' directory of each project
55
+ when it is created. After that, the stored data is reused for every runtime (training or experiment session) carried
56
+ out for each animal of the project. Additionally, a copy of the most actual configuration file is saved inside each
57
+ runtime session's 'raw_data' folder, providing seamless integration between the managed data and various Sun lab
58
+ (sl-) libraries.
59
+
60
+ Notes:
61
+ Together with SessionData, this class forms the entry point for all interactions with the data acquired in the
62
+ Sun lab. The fields of this class are used to flexibly configure the runtime behavior of major data acquisition
63
+ (sl-experiment) and processing (sl-forgery) libraries, adapting them for any project in the lab.
64
+
65
+ Most lab projects only need to adjust the "surgery_sheet_id" and "water_log_sheet_id" fields of the class. Most
66
+ fields in this class are used by the sl-experiment library to generate the SessionData class instance for each
67
+ session and during experiment data acquisition and preprocessing. Data processing pipelines use specialized
68
+ configuration files stored in other modules of this library.
69
+
70
+ Although all path fields use str | Path datatype, they are always stored as Path objects. These fields are
71
+ converted to strings only when the data is dumped as a .yaml file.
72
+ """
73
+
74
+ project_name: str = ""
75
+ """Stores the descriptive name of the project. This name is used to create the root directory for the project and
76
+ to discover and load project's data during runtime."""
77
+ surgery_sheet_id: str = ""
78
+ """The ID of the Google Sheet file that stores information about surgical interventions performed on all animals
79
+ participating in the managed project. This log sheet is used to parse and write the surgical intervention data for
80
+ each animal into every runtime session raw_data folder, so that the surgery data is always kept together with the
81
+ rest of the training and experiment data."""
82
+ water_log_sheet_id: str = ""
83
+ """The ID of the Google Sheet file that stores information about water restriction (and behavior tracker)
84
+ information for all animals participating in the managed project. This is used to synchronize the information
85
+ inside the water restriction log with the state of the animal at the end of each training or experiment session.
86
+ """
87
+ google_credentials_path: str | Path = Path("/media/Data/Experiments/sl-surgery-log-0f651e492767.json")
88
+ """
89
+ The path to the locally stored .JSON file that contains the service account credentials used to read and write
90
+ Google Sheet data. This is used to access and work with the surgery log and the water restriction log files.
91
+ Usually, the same service account is used across all projects.
92
+ """
93
+ server_credentials_path: str | Path = Path("/media/Data/Experiments/server_credentials.yaml")
94
+ """
95
+ The path to the locally stored .YAML file that contains the credentials for accessing the BioHPC server machine.
96
+ While the filesystem of the server machine should already be mounted to the local machine via SMB or equivalent
97
+ protocol, this data is used to establish SSH connection to the server and start newly acquired data processing
98
+ after it is transferred to the server. This allows data acquisition, preprocessing, and processing to be controlled
99
+ by the same runtime and prevents unprocessed data from piling up on the server.
100
+ """
101
+ local_root_directory: str | Path = Path("/media/Data/Experiments")
102
+ """The absolute path to the directory where all projects are stored on the local host-machine (VRPC). Note,
103
+ this field is configured automatically each time the class is instantiated through any method, so overwriting it
104
+ manually will not be respected."""
105
+ local_server_directory: str | Path = Path("/home/cybermouse/server/storage/sun_data")
106
+ """The absolute path to the directory where the raw data portion of all projects is stored on the BioHPC server.
107
+ This directory should be locally accessible (mounted) using a network sharing protocol, such as SMB."""
108
+ local_nas_directory: str | Path = Path("/home/cybermouse/nas/rawdata")
109
+ """The absolute path to the directory where all projects are stored on the Synology NAS. This directory should be
110
+ locally accessible (mounted) using a network sharing protocol, such as SMB."""
111
+ local_mesoscope_directory: str | Path = Path("/home/cybermouse/scanimage/mesodata")
112
+ """The absolute path to the root mesoscope (ScanImagePC) directory where all mesoscope-acquired data is aggregated
113
+ during acquisition runtime. This directory should be locally accessible (mounted) using a network sharing
114
+ protocol, such as SMB."""
115
+ local_server_working_directory: str | Path = Path("/home/cybermouse/server/workdir/sun_data")
116
+ """The absolute path to the directory where the processed data portion of all projects is stored on the BioHPC
117
+ server. This directory should be locally accessible (mounted) using a network sharing protocol, such as SMB."""
118
+ remote_storage_directory: str | Path = Path("/storage/sun_data")
119
+ """The absolute path, relative to the BioHPC server root, to the directory where all projects are stored on the
120
+ slow (SSD) volume of the server. This path is used when running remote (server-side) jobs and, therefore, has to
121
+ be relative to the server root."""
122
+ remote_working_directory: str | Path = Path("/workdir/sun_data")
123
+ """The absolute path, relative to the BioHPC server root, to the directory where all projects are stored on the
124
+ fast (NVME) volume of the server. This path is used when running remote (server-side) jobs and, therefore, has to
125
+ be relative to the server root."""
126
+ face_camera_index: int = 0
127
+ """The index of the face camera in the list of all available Harvester-managed cameras."""
128
+ left_camera_index: int = 0
129
+ """The index of the left body camera in the list of all available OpenCV-managed cameras."""
130
+ right_camera_index: int = 2
131
+ """The index of the right body camera in the list of all available OpenCV-managed cameras."""
132
+ harvesters_cti_path: str | Path = Path("/opt/mvIMPACT_Acquire/lib/x86_64/mvGenTLProducer.cti")
133
+ """The path to the GeniCam CTI file used to connect to Harvesters-managed cameras."""
134
+ actor_port: str = "/dev/ttyACM0"
135
+ """The USB port used by the Actor Microcontroller."""
136
+ sensor_port: str = "/dev/ttyACM1"
137
+ """The USB port used by the Sensor Microcontroller."""
138
+ encoder_port: str = "/dev/ttyACM2"
139
+ """The USB port used by the Encoder Microcontroller."""
140
+ headbar_port: str = "/dev/ttyUSB0"
141
+ """The USB port used by the HeadBar Zaber motor controllers (devices)."""
142
+ lickport_port: str = "/dev/ttyUSB1"
143
+ """The USB port used by the LickPort Zaber motor controllers (devices)."""
144
+ unity_ip: str = "127.0.0.1"
145
+ """The IP address of the MQTT broker used to communicate with the Unity game engine. This is only used during
146
+ experiment runtimes. Training runtimes ignore this parameter."""
147
+ unity_port: int = 1883
148
+ """The port number of the MQTT broker used to communicate with the Unity game engine. This is only used during
149
+ experiment runtimes. Training runtimes ignore this parameter."""
150
+ valve_calibration_data: dict[int | float, int | float] | tuple[tuple[int | float, int | float], ...] = (
151
+ (15000, 1.8556),
152
+ (30000, 3.4844),
153
+ (45000, 7.1846),
154
+ (60000, 10.0854),
155
+ )
156
+ """A tuple of tuples that maps water delivery solenoid valve open times, in microseconds, to the dispensed volume
157
+ of water, in microliters. During training and experiment runtimes, this data is used by the ValveModule to translate
158
+ the requested reward volumes into times the valve needs to be open to deliver the desired volume of water.
159
+ """
160
+
161
+ @classmethod
162
+ def load(cls, project_name: str, configuration_path: None | Path = None) -> "ProjectConfiguration":
163
+ """Loads the project configuration parameters from a project_configuration.yaml file.
164
+
165
+ This method is called during each interaction with any runtime session's data, including the creation of a new
166
+ session. When this method is called for a non-existent (new) project name, it generates the default
167
+ configuration file and prompts the user to update the configuration before proceeding with the runtime. All
168
+ future interactions with the sessions from this project reuse the existing configuration file.
169
+
170
+ Notes:
171
+ As part of its runtime, the method may prompt the user to provide the path to the local root directory.
172
+ This directory stores all project subdirectories and acts as the top level of the Sun lab data hierarchy.
173
+ The path to the directory is then saved inside user's default data directory, so that it can be reused for
174
+ all future projects. Use sl-replace-root CLI to replace the saved root directory path.
175
+
176
+ Since this class is used for all Sun lab data structure interactions, this method supports multiple ways of
177
+ loading class data. If this method is called as part of the sl-experiment new session creation pipeline, use
178
+ 'project_name' argument. If this method is called as part of the sl-forgery data processing pipeline(s), use
179
+ 'configuration_path' argument.
180
+
181
+ Args:
182
+ project_name: The name of the project whose configuration file needs to be discovered and loaded or, if the
183
+ project does not exist, created.
184
+ configuration_path: Optional. The path to the project_configuration.yaml file from which to load the data.
185
+ This way of resolving the configuration data source always takes precedence over the project_name when
186
+ both are provided.
187
+
188
+ Returns:
189
+ The initialized ProjectConfiguration instance that stores the configuration data for the target project.
190
+ """
191
+
192
+ # If the configuration path is not provided, uses the 'default' resolution strategy that involves reading the
193
+ # user's data directory
194
+ if configuration_path is None:
195
+ # Uses appdirs to locate the user data directory and resolve the path to the storage file
196
+ app_dir = Path(appdirs.user_data_dir(appname="sl_assets", appauthor="sun_lab"))
197
+ path_file = app_dir.joinpath("root_path.txt")
198
+
199
+ # If the .txt file that stores the local root path does not exist, prompts the user to provide the path to
200
+ # the local root directory and creates the root_path.txt file
201
+ if not path_file.exists():
202
+ # Gets the path to the local root directory from the user via command line input
203
+ message = (
204
+ "Unable to resolve the local root directory automatically. Provide the absolute path to the local "
205
+ "directory that stores all project-specific directories. This is required when resolving project "
206
+ "configuration based on project's name."
207
+ )
208
+ # noinspection PyTypeChecker
209
+ console.echo(message=message, level=LogLevel.WARNING)
210
+ root_path_str = input("Local root path: ")
211
+ root_path = Path(root_path_str)
212
+
213
+ # If necessary, generates the local root directory
214
+ ensure_directory_exists(root_path)
215
+
216
+ # Also ensures that the app directory exists, so that the path_file can be created below.
217
+ ensure_directory_exists(path_file)
218
+
219
+ # Saves the root path to the file
220
+ with open(path_file, "w") as f:
221
+ f.write(str(root_path))
222
+
223
+ # Once the location of the path storage file is resolved, reads the root path from the file
224
+ with open(path_file, "r") as f:
225
+ root_path = Path(f.read().strip())
226
+
227
+ # Uses the root experiment directory path to generate the path to the target project's configuration file.
228
+ configuration_path = root_path.joinpath(project_name, "configuration", "project_configuration.yaml")
229
+ ensure_directory_exists(configuration_path) # Ensures the directory tree for the config path exists.
230
+
231
+ # If the configuration file does not exist (this is the first time this class is initialized for a given
232
+ # project), generates a precursor (default) configuration file and prompts the user to update the configuration.
233
+ if not configuration_path.exists():
234
+ message = (
235
+ f"Unable to load project configuration data from disk as no 'project_configuration.yaml' file "
236
+ f"found at the provided project path. Generating a precursor (default) configuration file under "
237
+ f"{project_name}/configuration directory. Edit the file to specify project configuration before "
238
+ f"proceeding further to avoid runtime errors. Also, edit other configuration precursors saved to the "
239
+ f"same directory to control other aspects of data acquisition and processing."
240
+ )
241
+ # noinspection PyTypeChecker
242
+ console.echo(message=message, level=LogLevel.WARNING)
243
+
244
+ # Generates the default project configuration instance and dumps it as a .yaml file. Note, as part of
245
+ # this process, the class generates the correct 'local_root_path' based on the path provided by the
246
+ # user.
247
+ precursor = ProjectConfiguration(local_root_directory=Path(str(configuration_path.parents[2])))
248
+ precursor.project_name = project_name
249
+ precursor.save(path=configuration_path)
250
+
251
+ # Waits for the user to manually configure the newly created file.
252
+ input(f"Enter anything to continue: ")
253
+
254
+ # Loads the data from the YAML file and initializes the class instance. This now uses either the automatically
255
+ # resolved configuration path or the manually provided path
256
+ instance: ProjectConfiguration = cls.from_yaml(file_path=configuration_path) # type: ignore
257
+
258
+ # Converts all paths loaded as strings to Path objects used inside the library
259
+ instance.local_mesoscope_directory = Path(instance.local_mesoscope_directory)
260
+ instance.local_nas_directory = Path(instance.local_nas_directory)
261
+ instance.local_server_directory = Path(instance.local_server_directory)
262
+ instance.local_server_working_directory = Path(instance.local_server_working_directory)
263
+ instance.remote_storage_directory = Path(instance.remote_storage_directory)
264
+ instance.remote_working_directory = Path(instance.remote_working_directory)
265
+ instance.google_credentials_path = Path(instance.google_credentials_path)
266
+ instance.server_credentials_path = Path(instance.server_credentials_path)
267
+ instance.harvesters_cti_path = Path(instance.harvesters_cti_path)
268
+
269
+ # Local root path is always re-computed from the resolved configuration file's location
270
+ instance.local_root_directory = Path(str(configuration_path.parents[2]))
271
+
272
+ # Converts valve_calibration data from dictionary to a tuple of tuples format
273
+ if not isinstance(instance.valve_calibration_data, tuple):
274
+ instance.valve_calibration_data = tuple((k, v) for k, v in instance.valve_calibration_data.items())
275
+
276
+ # Partially verifies the loaded data. Most importantly, this step does not allow proceeding if the user did not
277
+ # replace the surgery log and water restriction log placeholders with valid ID values.
278
+ instance._verify_data()
279
+
280
+ # Returns the initialized class instance to caller
281
+ return instance
282
+
283
+ def save(self, path: Path) -> None:
284
+ """Saves class instance data to disk as a project_configuration.yaml file.
285
+
286
+ This method is automatically called when a new project is created. After this method's runtime, all future
287
+ calls to the load() method will reuse the configuration data saved to the .yaml file.
288
+
289
+ Notes:
290
+ When this method is used to generate the configuration .yaml file for a new project, it also generates the
291
+ example 'default_experiment.yaml'. This file is designed to showcase how to write ExperimentConfiguration
292
+ data files that are used to control Mesoscope-VR system states during experiment session runtimes.
293
+
294
+ Args:
295
+ path: The path to the .yaml file to save the data to.
296
+ """
297
+
298
+ # Copies instance data to prevent it from being modified by reference when executing the steps below
299
+ original = copy.deepcopy(self)
300
+
301
+ # Converts all Path objects to strings before dumping the data, as .yaml encoder does not properly recognize
302
+ # Path objects
303
+ original.local_root_directory = str(original.local_root_directory)
304
+ original.local_mesoscope_directory = str(original.local_mesoscope_directory)
305
+ original.local_nas_directory = str(original.local_nas_directory)
306
+ original.local_server_directory = str(original.local_server_directory)
307
+ original.local_server_working_directory = str(original.local_server_working_directory)
308
+ original.remote_storage_directory = str(original.remote_storage_directory)
309
+ original.remote_working_directory = str(original.remote_working_directory)
310
+ original.google_credentials_path = str(original.google_credentials_path)
311
+ original.server_credentials_path = str(original.server_credentials_path)
312
+ original.harvesters_cti_path = str(original.harvesters_cti_path)
313
+
314
+ # Converts valve calibration data into dictionary format
315
+ if isinstance(original.valve_calibration_data, tuple):
316
+ original.valve_calibration_data = {k: v for k, v in original.valve_calibration_data}
317
+
318
+ # Saves the data to the YAML file
319
+ original.to_yaml(file_path=path)
320
+
321
+ # As part of this runtime, also generates and dumps the 'precursor' experiment configuration file.
322
+ experiment_configuration_path = path.parent.joinpath("default_experiment.yaml")
323
+ if not experiment_configuration_path.exists():
324
+ example_experiment = ExperimentConfiguration()
325
+ example_experiment.to_yaml(experiment_configuration_path)
326
+
327
+ def _verify_data(self) -> None:
328
+ """Verifies the user-modified data loaded from the project_configuration.yaml file.
329
+
330
+ Since this class is explicitly designed to be modified by the user, this verification step is carried out to
331
+ ensure that the loaded data matches expectations. This reduces the potential for user errors to impact the
332
+ runtime behavior of the libraries using this class. This internal method is automatically called by the load()
333
+ method.
334
+
335
+ Notes:
336
+ The method does not verify all fields loaded from the configuration file and instead focuses on fields that
337
+ do not have valid default values. Since these fields are expected to be frequently modified by users, they
338
+ are the ones that require additional validation.
339
+
340
+ Raises:
341
+ ValueError: If the loaded data does not match expected formats or values.
342
+ """
343
+
344
+ # Verifies Google Sheet ID formatting. Google Sheet IDs are usually 44 characters long, containing letters,
345
+ # numbers, hyphens, and underscores
346
+ pattern = r"^[a-zA-Z0-9_-]{44}$"
347
+ if not re.match(pattern, self.surgery_sheet_id):
348
+ message = (
349
+ f"Unable to verify the surgery_sheet_id field loaded from the 'project_configuration.yaml' file. "
350
+ f"Expected a string with 44 characters, using letters, numbers, hyphens, and underscores, but found: "
351
+ f"{self.surgery_sheet_id}."
352
+ )
353
+ console.error(message=message, error=ValueError)
354
+ if not re.match(pattern, self.water_log_sheet_id):
355
+ message = (
356
+ f"Unable to verify the surgery_sheet_id field loaded from the 'project_configuration.yaml' file. "
357
+ f"Expected a string with 44 characters, using letters, numbers, hyphens, and underscores, but found: "
358
+ f"{self.water_log_sheet_id}."
359
+ )
360
+ console.error(message=message, error=ValueError)
361
+
362
+
363
+ @dataclass()
364
+ class RawData:
365
+ """Stores the paths to the directories and files that make up the 'raw_data' session-specific directory.
366
+
367
+ The raw_data directory stores the data acquired during the session runtime before and after preprocessing. Since
368
+ preprocessing does not alter the data, any data in that folder is considered 'raw'. The raw_data folder is initially
369
+ created on the VRPC and, after preprocessing, is copied to the BioHPC server and the Synology NAS for long-term
370
+ storage and further processing.
371
+ """
372
+
373
+ raw_data_path: Path = Path()
374
+ """Stores the path to the root raw_data directory of the session. This directory stores all raw data during
375
+ acquisition and preprocessing. Note, preprocessing does not alter raw data, so at any point in time all data inside
376
+ the folder is considered 'raw'."""
377
+ camera_data_path: Path = Path()
378
+ """Stores the path to the directory that contains all camera data acquired during the session. Primarily, this
379
+ includes .mp4 video files from each recorded camera."""
380
+ mesoscope_data_path: Path = Path()
381
+ """Stores the path to the directory that contains all Mesoscope data acquired during the session. Primarily, this
382
+ includes the mesoscope-acquired .tiff files (brain activity data) and the motion estimation data."""
383
+ behavior_data_path: Path = Path()
384
+ """Stores the path to the directory that contains all behavior data acquired during the session. Primarily, this
385
+ includes the .npz log files used by data-acquisition libraries to store all acquired data. The data stored in this
386
+ way includes the camera and mesoscope frame timestamps and the states of Mesoscope-VR components, such as lick
387
+ sensors, rotary encoders, and other modules."""
388
+ zaber_positions_path: Path = Path()
389
+ """Stores the path to the zaber_positions.yaml file. This file contains the snapshot of all Zaber motor positions
390
+ at the end of the session. Zaber motors are used to position the LickPort and the HeadBar manipulators, which is
391
+ essential for supporting proper brain imaging and animal's running behavior during the session."""
392
+ session_descriptor_path: Path = Path()
393
+ """Stores the path to the session_descriptor.yaml file. This file is partially filled by the system during runtime
394
+ and partially by the experimenter after the runtime. It contains session-specific information, such as the specific
395
+ training parameters, the positions of the Mesoscope objective and the notes made by the experimenter during
396
+ runtime."""
397
+ hardware_configuration_path: Path = Path()
398
+ """Stores the path to the hardware_configuration.yaml file. This file contains the partial snapshot of the
399
+ calibration parameters used by the Mesoscope-VR system components during runtime. Primarily, this is used during
400
+ data processing to read the .npz data log files generated during runtime."""
401
+ surgery_metadata_path: Path = Path()
402
+ """Stores the path to the surgery_metadata.yaml file. This file contains the most actual information about the
403
+ surgical intervention(s) performed on the animal prior to the session."""
404
+ project_configuration_path: Path = Path()
405
+ """Stores the path to the project_configuration.yaml file. This file contains the snapshot of the configuration
406
+ parameters for the session's project."""
407
+ session_data_path: Path = Path()
408
+ """Stores the path to the session_data.yaml file. This path is used by the SessionData instance to save itself to
409
+ disk as a .yaml file. The file contains all paths used during data acquisition and processing on both the VRPC and
410
+ the BioHPC server."""
411
+ experiment_configuration_path: Path = Path()
412
+ """Stores the path to the experiment_configuration.yaml file. This file contains the snapshot of the
413
+ experiment runtime configuration used by the session. This file is only created for experiment session. It does not
414
+ exist for behavior training sessions."""
415
+ mesoscope_positions_path: Path = Path()
416
+ """Stores the path to the mesoscope_positions.yaml file. This file contains the snapshot of the positions used
417
+ by the Mesoscope at the end of the session. This includes both the physical position of the mesoscope objective and
418
+ the 'virtual' tip, tilt, and fastZ positions set via ScanImage software. This file is only created for experiment
419
+ sessions that use the mesoscope, it is omitted for behavior training sessions."""
420
+ window_screenshot_path: Path = Path()
421
+ """Stores the path to the .png screenshot of the ScanImagePC screen. The screenshot should contain the image of the
422
+ cranial window and the red-dot alignment windows. This is used to generate a visual snapshot of the cranial window
423
+ alignment and appearance for each experiment session. This file is only created for experiment sessions that use
424
+ the mesoscope, it is omitted for behavior training sessions."""
425
+ telomere_path: Path = Path()
426
+ """Stores the path to the telomere.bin file. This file is created by the data processing pipelines running on the
427
+ BioHPC server to confirm that the raw_data transferred to the server was not altered or damage in transmission."""
428
+ checksum_path: Path = Path()
429
+ """Stores the path to the ax_checksum.txt file. This file is generated as part of packaging the data for
430
+ transmission and stores the xxHash-128 checksum of the data. It is used to verify that the transmission did not
431
+ damage or otherwise alter the data."""
432
+
433
+ def resolve_paths(self, root_directory_path: Path) -> None:
434
+ """Resolves all paths managed by the class instance based on the input root directory path.
435
+
436
+ This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
437
+ machine that instantiates the class.
438
+
439
+ Args:
440
+ root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
441
+ hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
442
+ the managed session.
443
+ """
444
+
445
+ # Generates the managed paths
446
+ self.raw_data_path = root_directory_path
447
+ self.camera_data_path = self.raw_data_path.joinpath("camera_data")
448
+ self.mesoscope_data_path = self.raw_data_path.joinpath("mesoscope_data")
449
+ self.behavior_data_path = self.raw_data_path.joinpath("behavior_data")
450
+ self.zaber_positions_path = self.raw_data_path.joinpath("zaber_positions.yaml")
451
+ self.session_descriptor_path = self.raw_data_path.joinpath("session_descriptor.yaml")
452
+ self.hardware_configuration_path = self.raw_data_path.joinpath("hardware_configuration.yaml")
453
+ self.surgery_metadata_path = self.raw_data_path.joinpath("surgery_metadata.yaml")
454
+ self.project_configuration_path = self.raw_data_path.joinpath("project_configuration.yaml")
455
+ self.session_data_path = self.raw_data_path.joinpath("session_data.yaml")
456
+ self.experiment_configuration_path = self.raw_data_path.joinpath("experiment_configuration.yaml")
457
+ self.mesoscope_positions_path = self.raw_data_path.joinpath("mesoscope_positions.yaml")
458
+ self.window_screenshot_path = self.raw_data_path.joinpath("window_screenshot.png")
459
+ self.telomere_path = self.raw_data_path.joinpath("telomere.bin")
460
+ self.checksum_path = self.raw_data_path.joinpath("ax_checksum.txt")
461
+
462
+ def make_directories(self) -> None:
463
+ """Ensures that all major subdirectories and the root directory exist."""
464
+ ensure_directory_exists(self.raw_data_path)
465
+ ensure_directory_exists(self.camera_data_path)
466
+ ensure_directory_exists(self.mesoscope_data_path)
467
+ ensure_directory_exists(self.behavior_data_path)
468
+
469
+
470
+ @dataclass()
471
+ class DeepLabCutData:
472
+ """Stores the paths to the directories and files that make up the 'deeplabcut' project-specific directory.
473
+
474
+ DeepLabCut (DLC) is used to track animal body parts and poses in video data acquired during experiment and training
475
+ sessions. Since DLC is designed to work with projects, rather than single animals or sessions, each Sun lab
476
+ project data hierarchy contains a dedicated 'deeplabcut' directory under the root project directory. The contents of
477
+ that directory are largely managed by the DLC itself. Therefore, each session of a given project refers to and
478
+ uses the same 'deeplabcut' directory.
479
+ """
480
+
481
+ deeplabcut_path: Path = Path()
482
+ """Stores the path to the project-specific DeepLabCut directory. This folder stores all DeepLabCut data specific to
483
+ a single project, which is reused during the processing of all sessions of the project."""
484
+
485
+ def resolve_paths(self, root_directory_path: Path) -> None:
486
+ """Resolves all paths managed by the class instance based on the input root directory path.
487
+
488
+ This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
489
+ machine that instantiates the class.
490
+
491
+ Args:
492
+ root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
493
+ hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
494
+ the managed session.
495
+ """
496
+
497
+ # Generates the managed paths
498
+ self.deeplabcut_path = root_directory_path
499
+
500
+ def make_directories(self) -> None:
501
+ """Ensures that all major subdirectories and the root directory exist."""
502
+ ensure_directory_exists(self.deeplabcut_path)
503
+
504
+
505
+ @dataclass()
506
+ class ConfigurationData:
507
+ """Stores the paths to the directories and files that make up the 'configuration' project-specific directory.
508
+
509
+ The configuration directory contains various configuration files and settings used by data acquisition,
510
+ preprocessing, and processing pipelines in the lab. Generally, all configuration settings are defined once for each
511
+ project and are reused for every session within the project. Therefore, this directory is created under each main
512
+ project directory.
513
+
514
+ Notes:
515
+ Some attribute names inside this section match the names in the RawData section. This is intentional, as some
516
+ configuration files are copied into the raw_data session directories to allow reinstating the session data
517
+ hierarchy across machines.
518
+ """
519
+
520
+ configuration_path: Path = Path()
521
+ """Stores the path to the project-specific configuration directory. This directory is used by all animals
522
+ and sessions of the project to store all pan-project configuration files. The configuration data is reused by all
523
+ sessions in the project."""
524
+ experiment_configuration_path: Path = Path()
525
+ """Stores the path to the experiment_configuration.yaml file. This file contains the snapshot of the
526
+ experiment runtime configuration used by the session. This file is only created for experiment session. It does not
527
+ exist for behavior training sessions."""
528
+ project_configuration_path: Path = Path()
529
+ """Stores the path to the project_configuration.yaml file. This file contains the snapshot of the configuration
530
+ parameters for the session's project."""
531
+ suite2p_configuration_path: Path = Path()
532
+ """Stores the path to the suite2p_configuration.yaml file stored inside the project's 'configuration' directory on
533
+ the fast BioHPC server volume. This configuration file specifies the parameters for the 'single day' suite2p
534
+ registration pipeline, which is applied to each session that generates brain activity data."""
535
+ multiday_configuration_path: Path = Path()
536
+ """Stores the path to the multiday_configuration.yaml file stored inside the project's 'configuration' directory
537
+ on the fast BioHPC server volume. This configuration file specifies the parameters for the 'multiday'
538
+ sl-suite2p-based registration pipelines used tot rack brain cells across multiple sessions."""
539
+
540
+ def resolve_paths(self, root_directory_path: Path, experiment_name: str | None = None) -> None:
541
+ """Resolves all paths managed by the class instance based on the input root directory path.
542
+
543
+ This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
544
+ machine that instantiates the class.
545
+
546
+ Args:
547
+ root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
548
+ hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
549
+ the managed session.
550
+ experiment_name: Optionally specifies the name of the experiment executed as part of the managed session's
551
+ runtime. This is used to correctly configure the path to the specific ExperimentConfiguration data file.
552
+ If the managed session is not an Experiment session, this parameter should be set to None.
553
+ """
554
+
555
+ # Generates the managed paths
556
+ self.configuration_path = root_directory_path
557
+ if experiment_name is None:
558
+ self.experiment_configuration_path = self.configuration_path.joinpath("null")
559
+ else:
560
+ self.experiment_configuration_path = self.configuration_path.joinpath(f"{experiment_name}.yaml")
561
+ self.project_configuration_path = self.configuration_path.joinpath("project_configuration.yaml")
562
+ self.suite2p_configuration_path = self.configuration_path.joinpath("suite2p_configuration.yaml")
563
+ self.multiday_configuration_path = self.configuration_path.joinpath("multiday_configuration.yaml")
564
+
565
+ def make_directories(self) -> None:
566
+ """Ensures that all major subdirectories and the root directory exist."""
567
+ ensure_directory_exists(self.configuration_path)
568
+
569
+
570
+ @dataclass()
571
+ class ProcessedData:
572
+ """Stores the paths to the directories and files that make up the 'processed_data' session-specific directory.
573
+
574
+ The processed_data directory stores the data generated by various processing pipelines from the raw data (contents
575
+ of the raw_data directory). Processed data represents an intermediate step between raw data and the dataset used in
576
+ the data analysis, but is not itself designed to be analyzed.
577
+
578
+ Notes:
579
+ The paths from this section are typically used only on the BioHPC server. This is because most data processing
580
+ in the lab is performed using the processing server's resources. On the server, processed data is stored on
581
+ the fast (NVME) drive volume, in contrast to raw data, which is stored on the slow (SSD) drive volume.
582
+
583
+ When this class is instantiated on a machine other than BioHPC server, for example, to test processing
584
+ pipelines, it uses the same drive as the raw_data folder to create the processed_data folder. This relies on the
585
+ assumption that non-server machines in the lab only use fast NVME drives, so there is no need to separate
586
+ storage and processing volumes.
587
+ """
588
+
589
+ processed_data_path: Path = Path()
590
+ """Stores the path to the root processed_data directory of the session. This directory stores the processed data
591
+ as it is generated by various data processing pipelines."""
592
+ camera_data_path: Path = Path()
593
+ """Stores the path to the directory that contains video tracking data generated by our DeepLabCut-based video
594
+ processing pipelines."""
595
+ mesoscope_data_path: Path = Path()
596
+ """Stores path to the directory that contains processed brain activity (cell) data generated by our suite2p-based
597
+ photometry processing pipelines (single day and multi day)."""
598
+ behavior_data_path: Path = Path()
599
+ """Stores the path to the directory that contains the non-video behavior and system runtime data extracted from
600
+ .npz log files by our in-house log parsing pipeline."""
601
+ job_logs_path: Path = Path()
602
+ """Stores the path to the directory that stores the standard output and standard error data collected during
603
+ server-side data processing pipeline runtimes. Since we use SLURM job manager to execute multiple compute jobs on
604
+ the BioHPC server, all information sent to the terminal during runtime is redirected to text files stored in this
605
+ directory."""
606
+ project_configuration_path: Path = Path()
607
+ """Stores the path to the project_configuration.yaml file. This file contains the snapshot of the configuration
608
+ parameters for the session's project."""
609
+ session_data_path: Path = Path()
610
+ """Stores the path to the session_data.yaml file. This path is used by the SessionData instance to save itself to
611
+ disk as a .yaml file. The file contains all paths used during data acquisition and processing on both the VRPC and
612
+ the BioHPC server."""
613
+
614
+ def resolve_paths(self, root_directory_path: Path) -> None:
615
+ """Resolves all paths managed by the class instance based on the input root directory path.
616
+
617
+ This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
618
+ machine that instantiates the class.
619
+
620
+ Args:
621
+ root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
622
+ hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
623
+ the managed session.
624
+ """
625
+ # Generates the managed paths
626
+ self.processed_data_path = root_directory_path
627
+ self.camera_data_path = self.processed_data_path.joinpath("camera_data")
628
+ self.mesoscope_data_path = self.processed_data_path.joinpath("mesoscope_data")
629
+ self.behavior_data_path = self.processed_data_path.joinpath("behavior_data")
630
+ self.job_logs_path = self.processed_data_path.joinpath("job_logs")
631
+ self.project_configuration_path = self.processed_data_path.joinpath("project_configuration.yaml")
632
+ self.session_data_path = self.processed_data_path.joinpath("session_data.yaml")
633
+
634
+ def make_directories(self) -> None:
635
+ """Ensures that all major subdirectories and the root directory exist."""
636
+
637
+ ensure_directory_exists(self.processed_data_path)
638
+ ensure_directory_exists(self.camera_data_path)
639
+ ensure_directory_exists(self.behavior_data_path)
640
+ ensure_directory_exists(self.job_logs_path)
641
+
642
+
643
+ @dataclass()
644
+ class VRPCPersistentData:
645
+ """Stores the paths to the directories and files that make up the 'persistent_data' directory on the VRPC.
646
+
647
+ Persistent data directories are only used during data acquisition. Therefore, unlike most other directories, they
648
+ are purposefully designed for specific PCs that participate in data acquisition. This section manages the
649
+ animal-specific persistent_data directory stored on the VRPC.
650
+
651
+ VRPC persistent data directory is used to preserve configuration data, such as the positions of Zaber motors and
652
+ Meososcope objective, so that they can be reused across sessions of the same animals. The data in this directory
653
+ is read at the beginning of each session and replaced at the end of each session.
654
+ """
655
+
656
+ persistent_data_path: Path = Path()
657
+ """Stores the path to the project and animal specific 'persistent_data' directory to which the managed session
658
+ belongs, relative to the VRPC root. This directory is exclusively used on the VRPC."""
659
+ zaber_positions_path: Path = Path()
660
+ """Stores the path to the Zaber motor positions snapshot generated at the end of the previous session runtime. This
661
+ is used to automatically restore all Zaber motors to the same position across all sessions."""
662
+ mesoscope_positions_path: Path = Path()
663
+ """Stores the path to the Mesoscope positions snapshot generated at the end of the previous session runtime. This
664
+ is used to help the user to (manually) restore the Mesoscope to the same position across all sessions."""
665
+
666
+ def resolve_paths(self, root_directory_path: Path) -> None:
667
+ """Resolves all paths managed by the class instance based on the input root directory path.
668
+
669
+ This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
670
+ machine that instantiates the class.
671
+
672
+ Args:
673
+ root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
674
+ hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
675
+ the managed session.
676
+ """
677
+
678
+ # Generates the managed paths
679
+ self.persistent_data_path = root_directory_path
680
+ self.zaber_positions_path = self.persistent_data_path.joinpath("zaber_positions.yaml")
681
+ self.mesoscope_positions_path = self.persistent_data_path.joinpath("mesoscope_positions.yaml")
682
+
683
+ def make_directories(self) -> None:
684
+ """Ensures that all major subdirectories and the root directory exist."""
685
+
686
+ ensure_directory_exists(self.persistent_data_path)
687
+
688
+
689
+ @dataclass()
690
+ class ScanImagePCPersistentData:
691
+ """Stores the paths to the directories and files that make up the 'persistent_data' directory on the ScanImagePC.
692
+
693
+ Persistent data directories are only used during data acquisition. Therefore, unlike most other directories, they
694
+ are purposefully designed for specific PCs that participate in data acquisition. This section manages the
695
+ animal-specific persistent_data directory stored on the ScanImagePC (Mesoscope PC).
696
+
697
+ ScanImagePC persistent data directory is used to preserve the motion estimation snapshot, generated during the first
698
+ experiment session. This is necessary to align the brain recording field of view across sessions. In turn, this
699
+ is used to carry out 'online' motion and z-drift correction, improving the accuracy of across-day (multi-day)
700
+ cell tracking.
701
+ """
702
+
703
+ persistent_data_path: Path = Path()
704
+ """Stores the path to the project and animal specific 'persistent_data' directory to which the managed session
705
+ belongs, relative to the ScanImagePC root. This directory is exclusively used on the ScanImagePC (Mesoscope PC)."""
706
+ motion_estimator_path: Path = Path()
707
+ """Stores the 'reference' motion estimator file generated during the first experiment session of each animal. This
708
+ file is kept on the ScanImagePC to image the same population of cells across all experiment sessions."""
709
+
710
+ def resolve_paths(self, root_directory_path: Path) -> None:
711
+ """Resolves all paths managed by the class instance based on the input root directory path.
712
+
713
+ This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
714
+ machine that instantiates the class.
715
+
716
+ Args:
717
+ root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
718
+ hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
719
+ the managed session.
720
+ """
721
+
722
+ # Generates the managed paths
723
+ self.persistent_data_path = root_directory_path
724
+ self.motion_estimator_path = self.persistent_data_path.joinpath("MotionEstimator.me")
725
+
726
+ def make_directories(self) -> None:
727
+ """Ensures that all major subdirectories and the root directory exist."""
728
+
729
+ ensure_directory_exists(self.persistent_data_path)
730
+
731
+
732
+ @dataclass()
733
+ class MesoscopeData:
734
+ """Stores the paths to the directories and files that make up the 'meso_data' directory on the ScanImagePC.
735
+
736
+ The meso_data directory is the root directory where all mesoscope-generated data is stored on the ScanImagePC. The
737
+ path to this directory should be given relative to the VRPC root and be mounted to the VRPC filesystem via the
738
+ SMB or equivalent protocol.
739
+
740
+ During runtime, the ScanImagePC should organize all collected data under this root directory. During preprocessing,
741
+ the VRPC uses SMB to access the data in this directory and merge it into the 'raw_data' session directory. The paths
742
+ in this section, therefore, are specific to the VRPC and are not used on other PCs.
743
+ """
744
+
745
+ meso_data_path: Path = Path()
746
+ """Stores the path to the root ScanImagePC data directory, mounted to the VRPC filesystem via the SMB or equivalent
747
+ protocol. All mesoscope-generated data is stored under this root directory before it is merged into the VRPC-managed
748
+ raw_data directory of each session."""
749
+ mesoscope_data_path: Path = Path()
750
+ """Stores the path to the 'default' mesoscope_data directory. All experiment sessions across all animals and
751
+ projects use the same mesoscope_data directory to save the data generated by the mesoscope via ScanImage
752
+ software. This simplifies ScanImagePC configuration process during runtime, as all data is always saved in the same
753
+ directory. During preprocessing, the data is moved from the default directory first into a session-specific
754
+ ScanImagePC directory and then into the VRPC raw_data session directory."""
755
+ session_specific_path: Path = Path()
756
+ """Stores the path to the session-specific data directory. This directory is generated at the end of each experiment
757
+ runtime to prepare mesoscope data for being moved to the VRPC-managed raw_data directory and to reset the 'default'
758
+ mesoscope_data directory for the next session's runtime."""
759
+ ubiquitin_path: Path = Path()
760
+ """Stores the path to the 'ubiquitin.bin' file. This file is automatically generated inside the session-specific
761
+ data directory after its contents are safely transferred to the VRPC as part of preprocessing. During redundant data
762
+ removal step of preprocessing, the VRPC searches for directories marked with ubiquitin.bin and deletes them from the
763
+ ScanImagePC filesystem."""
764
+
765
+ def resolve_paths(self, root_mesoscope_path: Path, session_name: str) -> None:
766
+ """Resolves all paths managed by the class instance based on the input root directory path.
767
+
768
+ This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
769
+ machine that instantiates the class.
770
+
771
+ Args:
772
+ root_mesoscope_path: The path to the top-level directory of the ScanImagePC data hierarchy mounted to the
773
+ VRPC via the SMB or equivalent protocol.
774
+ session_name: The name of the session for which this subclass is initialized.
775
+ """
776
+
777
+ # Generates the managed paths
778
+ self.meso_data_path = root_mesoscope_path
779
+ self.session_specific_path = self.meso_data_path.joinpath(session_name)
780
+ self.ubiquitin_path = self.session_specific_path.joinpath("ubiquitin.bin")
781
+ self.mesoscope_data_path = self.meso_data_path.joinpath("mesoscope_data")
782
+
783
+ def make_directories(self) -> None:
784
+ """Ensures that all major subdirectories and the root directory exist."""
785
+
786
+ ensure_directory_exists(self.meso_data_path)
787
+
788
+
789
+ @dataclass()
790
+ class VRPCDestinations:
791
+ """Stores the paths to the VRPC filesystem-mounted directories of the Synology NAS and BioHPC server.
792
+
793
+ The paths from this section are primarily used to transfer preprocessed data to the long-term storage destinations.
794
+ Additionally, they allow VRPC to interface with the configuration directory of the BioHPC server to start data
795
+ processing jobs and to read the data from the processed_data directory to remove redundant data from the VRPC
796
+ filesystem.
797
+
798
+ Overall, this section is intended solely for the VRPC and should not be used on other PCs.
799
+ """
800
+
801
+ nas_raw_data_path: Path = Path()
802
+ """Stores the path to the session's raw_data directory on the Synology NAS, which is mounted to the VRPC via the
803
+ SMB or equivalent protocol."""
804
+ server_raw_data_path: Path = Path()
805
+ """Stores the path to the session's raw_data directory on the BioHPC server, which is mounted to the VRPC via the
806
+ SMB or equivalent protocol."""
807
+ server_processed_data_path: Path = Path()
808
+ """Stores the path to the session's processed_data directory on the BioHPC server, which is mounted to the VRPC via
809
+ the SMB or equivalent protocol."""
810
+ server_configuration_path: Path = Path()
811
+ """Stores the path to the project-specific 'configuration' directory on the BioHPC server, which is mounted to the
812
+ VRPC via the SMB or equivalent protocol."""
813
+ telomere_path: Path = Path()
814
+ """Stores the path to the session's telomere.bin marker. This marker is generated as part of data processing on the
815
+ BioHPC server to notify the VRPC that the server received preprocessed data intact. The presence of this marker is
816
+ used by the VRPC to determine which locally stored raw_data is safe to delete from the filesystem."""
817
+ suite2p_configuration_path: Path = Path()
818
+ """Stores the path to the suite2p_configuration.yaml file stored inside the project's 'configuration' directory on
819
+ the BioHPC server. This configuration file specifies the parameters for the 'single day' sl-suite2p registration
820
+ pipeline, which is applied to each session that generates brain activity data."""
821
+ processing_tracker_path: Path = Path()
822
+ """Stores the path to the processing_tracker.yaml file stored inside the sessions' root processed_data directory on
823
+ the BioHPC server. This file tracks which processing pipelines need to be applied the target session and the status
824
+ (success / failure) of each applied pipeline.
825
+ """
826
+ multiday_configuration_path: Path = Path()
827
+ """Stores the path to the multiday_configuration.yaml file stored inside the project's 'configuration' directory
828
+ on the BioHPC server. This configuration file specifies the parameters for the 'multiday' sl-suite2p registration
829
+ pipeline used to track brain cells across multiple sessions."""
830
+
831
+ def resolve_paths(
832
+ self,
833
+ nas_raw_data_path: Path,
834
+ server_raw_data_path: Path,
835
+ server_processed_data_path: Path,
836
+ server_configuration_path: Path,
837
+ ) -> None:
838
+ """Resolves all paths managed by the class instance based on the input root directory paths.
839
+
840
+ This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
841
+ machine that instantiates the class.
842
+
843
+ Args:
844
+ nas_raw_data_path: The path to the session's raw_data directory on the Synology NAS, relative to the VRPC
845
+ filesystem root.
846
+ server_raw_data_path: The path to the session's raw_data directory on the BioHPC server, relative to the
847
+ VRPC filesystem root.
848
+ server_processed_data_path: The path to the session's processed_data directory on the BioHPC server,
849
+ relative to the VRPC filesystem root.
850
+ server_configuration_path: The path to the project-specific 'configuration' directory on the BioHPC server,
851
+ relative to the VRPC filesystem root.
852
+ """
853
+
854
+ # Generates the managed paths
855
+ self.nas_raw_data_path = nas_raw_data_path
856
+ self.server_raw_data_path = server_raw_data_path
857
+ self.server_processed_data_path = server_processed_data_path
858
+ self.server_configuration_path = server_configuration_path
859
+ self.telomere_path = self.server_raw_data_path.joinpath("telomere.bin")
860
+ self.suite2p_configuration_path = self.server_configuration_path.joinpath("suite2p_configuration.yaml")
861
+ self.processing_tracker_path = self.server_processed_data_path.joinpath("processing_tracker.yaml")
862
+ self.multiday_configuration_path = self.server_configuration_path.joinpath("multiday_configuration.yaml")
863
+
864
+ def make_directories(self) -> None:
865
+ """Ensures that all major subdirectories and the root directory exist."""
866
+ ensure_directory_exists(self.nas_raw_data_path)
867
+ ensure_directory_exists(self.server_raw_data_path)
868
+ ensure_directory_exists(self.server_configuration_path)
869
+ ensure_directory_exists(self.server_processed_data_path)
870
+
871
+
872
+ @dataclass
873
+ class SessionData(YamlConfig):
874
+ """Stores and manages the data layout of a single training or experiment session acquired using the Sun lab
875
+ Mesoscope-VR system.
876
+
877
+ The primary purpose of this class is to maintain the session data structure across all supported destinations and
878
+ during all processing stages. It generates the paths used by all other classes from all Sun lab libraries that
879
+ interact with the session's data from the point of its creation and until the data is integrated into an
880
+ analysis dataset.
881
+
882
+ When necessary, the class can be used to either generate a new session or load the layout of an already existing
883
+ session. When the class is used to create a new session, it generates the new session's name using the current
884
+ UTC timestamp, accurate to microseconds. This ensures that each session name is unique and preserves the overall
885
+ session order.
886
+
887
+ Notes:
888
+ If this class is instantiated on the VRPC, it is expected that the BioHPC server, Synology NAS, and ScanImagePC
889
+ data directories are mounted on the local filesystem via the SMB or equivalent protocol. All manipulations
890
+ with these destinations are carried out with the assumption that the local OS has full access to these
891
+ directories and filesystems.
892
+
893
+ This class is specifically designed for working with the data from a single session, performed by a single
894
+ animal under the specific experiment. The class is used to manage both raw and processed data. It follows the
895
+ data through acquisition, preprocessing and processing stages of the Sun lab data workflow. Together with
896
+ ProjectConfiguration class, this class serves as an entry point for all interactions with the managed session's
897
+ data.
898
+ """
899
+
900
+ project_name: str
901
+ """Stores the name of the managed session's project."""
902
+ animal_id: str
903
+ """Stores the unique identifier of the animal that participates in the managed session."""
904
+ session_name: str
905
+ """Stores the name (timestamp-based ID) of the managed session."""
906
+ session_type: str
907
+ """Stores the type of the session. Primarily, this determines how to read the session_descriptor.yaml file. Has
908
+ to be set to one of the four supported types: 'Lick training', 'Run training', 'Window checking' or 'Experiment'.
909
+ """
910
+ experiment_name: str | None
911
+ """Stores the name of the experiment configuration file. If the session_type field is set to 'Experiment' and this
912
+ field is not None (null), it communicates the specific experiment configuration used by the session. During runtime,
913
+ the name stored here is used to load the specific experiment configuration data stored in a .yaml file with the
914
+ same name. If the session is not an experiment session, this field is ignored."""
915
+ raw_data: RawData = field(default_factory=lambda: RawData())
916
+ """Stores the paths to all subfolders and files found under the /project/animal/session/raw_data directory of any
917
+ PC used to work with Sun lab data."""
918
+ processed_data: ProcessedData = field(default_factory=lambda: ProcessedData())
919
+ """Stores the paths to all subfolders and files found under the /project/animal/session/processed_data directory of
920
+ any PC used to work with Sun lab data."""
921
+ deeplabcut_data: DeepLabCutData = field(default_factory=lambda: DeepLabCutData())
922
+ """Stores the paths to all subfolders and files found under the /project/deeplabcut directory of any PC used to
923
+ work with Sun lab data."""
924
+ configuration_data: ConfigurationData = field(default_factory=lambda: ConfigurationData())
925
+ """Stores the paths to all subfolders and files found under the /project/configuration directory of any PC used to
926
+ work with Sun lab data."""
927
+ vrpc_persistent_data: VRPCPersistentData = field(default_factory=lambda: VRPCPersistentData())
928
+ """Stores the paths to all subfolders and files found under the /project/animal/persistent_data directory of
929
+ the VRPC used in the Sun lab to acquire behavior data."""
930
+ scanimagepc_persistent_data: ScanImagePCPersistentData = field(default_factory=lambda: ScanImagePCPersistentData())
931
+ """Stores the paths to all subfolders and files found under the /project/animal/persistent_data directory of
932
+ the ScanImagePC used in the Sun lab to acquire brain activity data."""
933
+ mesoscope_data: MesoscopeData = field(default_factory=lambda: MesoscopeData())
934
+ """Stores the paths to all subfolders and files found under the /meso_data (root mesoscope data) directory of
935
+ the ScanImagePC used in the Sun lab to acquire brain activity data."""
936
+ destinations: VRPCDestinations = field(default_factory=lambda: VRPCDestinations())
937
+ """Stores the paths to all subfolders and files under various VRPC-filesystem-mounted directories of other machines
938
+ used in the Sun lab for long-term data storage."""
939
+
940
+ @classmethod
941
+ def create(
942
+ cls,
943
+ animal_id: str,
944
+ session_type: str,
945
+ project_configuration: ProjectConfiguration,
946
+ experiment_name: str | None = None,
947
+ session_name: str | None = None,
948
+ ) -> "SessionData":
949
+ """Creates a new SessionData object and generates the new session's data structure.
950
+
951
+ This method is called by sl-experiment runtimes that create new training or experiment sessions to generate the
952
+ session data directory tree. It always assumes it is called on the VRPC and, as part of its runtime, resolves
953
+ and generates the necessary local and ScanImagePC directories to support acquiring and preprocessing session's
954
+ data.
955
+
956
+ Notes:
957
+ To load an already existing session data structure, use the load() method instead.
958
+
959
+ This method automatically dumps the data of the created SessionData instance into the session_data.yaml file
960
+ inside the root raw_data directory of the created hierarchy. It also finds and dumps other configuration
961
+ files, such as project_configuration.yaml and experiment_configuration.yaml, into the same raw_data
962
+ directory. This ensures that if the session's runtime is interrupted unexpectedly, the acquired data can
963
+ still be processed.
964
+
965
+ Args:
966
+ animal_id: The ID code of the animal for which the data is acquired.
967
+ session_type: The type of the session. Primarily, this determines how to read the session_descriptor.yaml
968
+ file. Valid options are 'Lick training', 'Run training', 'Window checking', or 'Experiment'.
969
+ experiment_name: The name of the experiment executed during managed session. This optional argument is only
970
+ used for 'Experiment' session types. It is used to find the experiment configuration .YAML file.
971
+ project_configuration: The initialized ProjectConfiguration instance that stores the session's project
972
+ configuration data. This is used to determine the root directory paths for all lab machines used during
973
+ data acquisition and processing.
974
+ session_name: An optional session_name override. Generally, this argument should not be provided for most
975
+ sessions. When provided, the method uses this name instead of generating a new timestamp-based name.
976
+ This is only used during the 'ascension' runtime to convert old data structures to the modern
977
+ lab standards.
978
+
979
+ Returns:
980
+ An initialized SessionData instance that stores the layout of the newly created session's data.
981
+ """
982
+
983
+ # Acquires the UTC timestamp to use as the session name
984
+ if session_name is None:
985
+ session_name = str(get_timestamp(time_separator="-"))
986
+
987
+ # Extracts the root directory paths stored inside the project configuration file. These roots are then used to
988
+ # initialize this class instance.
989
+ vrpc_root = Path(project_configuration.local_root_directory)
990
+ mesoscope_root = Path(project_configuration.local_mesoscope_directory)
991
+ biohpc_root = Path(project_configuration.local_server_directory)
992
+ biohpc_workdir = Path(project_configuration.local_server_working_directory)
993
+ nas_root = Path(project_configuration.local_nas_directory)
994
+
995
+ # Extracts the name of the project stored inside the project configuration file.
996
+ project_name = project_configuration.project_name
997
+
998
+ # Constructs the session directory path
999
+ session_path = vrpc_root.joinpath(project_name, animal_id, session_name)
1000
+
1001
+ # Handles potential session name conflicts
1002
+ counter = 0
1003
+ while session_path.exists():
1004
+ counter += 1
1005
+ new_session_name = f"{session_name}_{counter}"
1006
+ session_path = vrpc_root.joinpath(project_name, animal_id, new_session_name)
1007
+
1008
+ # If a conflict is detected and resolved, warns the user about the resolved conflict.
1009
+ if counter > 0:
1010
+ message = (
1011
+ f"Session name conflict occurred for animal '{animal_id}' of project '{project_name}' "
1012
+ f"when adding the new session with timestamp {session_name}. The session with identical name "
1013
+ f"already exists. The newly created session directory uses a '_{counter}' postfix to distinguish "
1014
+ f"itself from the already existing session directory."
1015
+ )
1016
+ console.echo(message=message, level=LogLevel.ERROR)
1017
+
1018
+ # Generates subclasses stored inside the main class instance based on the data resolved above. Note; most fields
1019
+ # of these classes are resolved automatically, based on one or more 'root' paths provided to the 'resolve_paths'
1020
+ # method.
1021
+ raw_data = RawData()
1022
+ raw_data.resolve_paths(root_directory_path=session_path.joinpath("raw_data"))
1023
+ raw_data.make_directories() # Generates the local directory tree
1024
+
1025
+ processed_data = ProcessedData()
1026
+ processed_data.resolve_paths(root_directory_path=session_path.joinpath("processed_data"))
1027
+ processed_data.make_directories()
1028
+
1029
+ dlc_data = DeepLabCutData()
1030
+ dlc_data.resolve_paths(root_directory_path=vrpc_root.joinpath(project_name, "deeplabcut"))
1031
+ dlc_data.make_directories()
1032
+
1033
+ configuration_data = ConfigurationData()
1034
+ configuration_data.resolve_paths(
1035
+ root_directory_path=vrpc_root.joinpath(project_name, "configuration"),
1036
+ experiment_name=experiment_name,
1037
+ )
1038
+ configuration_data.make_directories()
1039
+
1040
+ vrpc_persistent_data = VRPCPersistentData()
1041
+ vrpc_persistent_path = vrpc_root.joinpath(project_name, animal_id, "persistent_data")
1042
+ vrpc_persistent_data.resolve_paths(root_directory_path=vrpc_persistent_path)
1043
+ vrpc_persistent_data.make_directories()
1044
+
1045
+ scanimagepc_persistent_data = ScanImagePCPersistentData()
1046
+ scanimagepc_persistent_path = mesoscope_root.joinpath(project_name, animal_id, "persistent_data")
1047
+ scanimagepc_persistent_data.resolve_paths(root_directory_path=scanimagepc_persistent_path)
1048
+ scanimagepc_persistent_data.make_directories()
1049
+
1050
+ mesoscope_data = MesoscopeData()
1051
+ mesoscope_data.resolve_paths(root_mesoscope_path=mesoscope_root, session_name=session_name)
1052
+ mesoscope_data.make_directories()
1053
+
1054
+ destinations = VRPCDestinations()
1055
+ destinations.resolve_paths(
1056
+ nas_raw_data_path=nas_root.joinpath(project_name, animal_id, session_name, "raw_data"),
1057
+ server_raw_data_path=biohpc_root.joinpath(project_name, animal_id, session_name, "raw_data"),
1058
+ server_configuration_path=biohpc_root.joinpath(project_name, "configuration"),
1059
+ server_processed_data_path=biohpc_workdir.joinpath(project_name, "processed_data"),
1060
+ )
1061
+ destinations.make_directories()
1062
+
1063
+ # Packages the sections generated above into a SessionData instance
1064
+ instance = SessionData(
1065
+ project_name=project_configuration.project_name,
1066
+ animal_id=animal_id,
1067
+ session_name=session_name,
1068
+ session_type=session_type,
1069
+ raw_data=raw_data,
1070
+ deeplabcut_data=dlc_data,
1071
+ configuration_data=configuration_data,
1072
+ processed_data=processed_data,
1073
+ vrpc_persistent_data=vrpc_persistent_data,
1074
+ scanimagepc_persistent_data=scanimagepc_persistent_data,
1075
+ mesoscope_data=mesoscope_data,
1076
+ destinations=destinations,
1077
+ experiment_name=experiment_name,
1078
+ )
1079
+
1080
+ # Saves the configured instance data to the session's folder, so that it can be reused during processing or
1081
+ # preprocessing
1082
+ instance._save()
1083
+
1084
+ # Extracts and saves the necessary configuration classes to the session raw_data folder. Note, this list of
1085
+ # classes is not exhaustive. More classes are saved as part of the session runtime management class start() and
1086
+ # __init__() method runtimes:
1087
+
1088
+ # Discovers and saves the necessary configuration class instances to the raw_data and the processed_data folders
1089
+ # of the managed session:
1090
+ # Project Configuration
1091
+ sh.copy2(
1092
+ src=instance.configuration_data.project_configuration_path,
1093
+ dst=instance.raw_data.project_configuration_path,
1094
+ )
1095
+ sh.copy2(
1096
+ src=instance.configuration_data.project_configuration_path,
1097
+ dst=instance.processed_data.project_configuration_path,
1098
+ ) # ProjectConfiguration and SessionData are saved to both raw and processed data folders.
1099
+ # Experiment Configuration, if the session type is Experiment.
1100
+ if experiment_name is not None:
1101
+ sh.copy2(
1102
+ src=instance.configuration_data.experiment_configuration_path,
1103
+ dst=instance.raw_data.experiment_configuration_path,
1104
+ )
1105
+
1106
+ # Returns the initialized SessionData instance to caller
1107
+ return instance
1108
+
1109
+ @classmethod
1110
+ def load(
1111
+ cls,
1112
+ session_path: Path,
1113
+ on_server: bool,
1114
+ ) -> "SessionData":
1115
+ """Loads the SessionData instance from the target session's session_data.yaml file.
1116
+
1117
+ This method is used to load the data layout information of an already existing session. Primarily, this is used
1118
+ when preprocessing or processing session data. Depending on the call location (machine), the method
1119
+ automatically resolves all necessary paths and creates the necessary directories.
1120
+
1121
+ Notes:
1122
+ To create a new session, use the create() method instead.
1123
+
1124
+ Although session_data.yaml is stored both inside raw_data and processed_data subfolders, this method
1125
+ always searches only inside the raw_data folder. Storing session data in both folders is only used to ensure
1126
+ human experimenters can always trace all data in the lab back to the proper project, animal, and session.
1127
+
1128
+ Args:
1129
+ session_path: The path to the root directory of an existing session, e.g.: vrpc_root/project/animal/session.
1130
+ on_server: Determines whether the method is used to initialize an existing session on the BioHPC server or
1131
+ a non-server machine. Note, non-server runtimes use the same 'root' directory to store raw_data and
1132
+ processed_data subfolders. BioHPC server runtimes use different volumes (drives) to store these
1133
+ subfolders.
1134
+
1135
+ Returns:
1136
+ An initialized SessionData instance for the session whose data is stored at the provided path.
1137
+
1138
+ Raises:
1139
+ FileNotFoundError: If the 'session_data.yaml' file is not found under the session_path/raw_data/ subfolder.
1140
+ """
1141
+ # To properly initialize the SessionData instance, the provided path should contain the raw_data directory
1142
+ # with session_data.yaml file.
1143
+ session_data_path = session_path.joinpath("raw_data", "session_data.yaml")
1144
+ if not session_data_path.exists():
1145
+ message = (
1146
+ f"Unable to load the SessionData class for the target session: {session_path.stem}. No "
1147
+ f"session_data.yaml file was found inside the raw_data folder of the session. This likely "
1148
+ f"indicates that the session runtime was interrupted before recording any data, or that the "
1149
+ f"session path does not point to a valid session."
1150
+ )
1151
+ console.error(message=message, error=FileNotFoundError)
1152
+
1153
+ # Loads class data from .yaml file
1154
+ instance: SessionData = cls.from_yaml(file_path=session_data_path) # type: ignore
1155
+
1156
+ # The method assumes that the 'donor' .yaml file is always stored inside the raw_data directory of the session
1157
+ # to be processed. Since the directory itself might have moved (between or even within the same PC) relative to
1158
+ # where it was when the SessionData snapshot was generated, reconfigures the paths to all raw_data files using
1159
+ # the root from above.
1160
+ local_root = session_path.parents[2]
1161
+
1162
+ # RAW DATA
1163
+ new_root = local_root.joinpath(instance.project_name, instance.animal_id, instance.session_name, "raw_data")
1164
+ instance.raw_data.resolve_paths(root_directory_path=new_root)
1165
+
1166
+ # Uses the adjusted raw_data section to load the ProjectConfiguration instance. This is used below to resolve
1167
+ # all other SessionData sections, as it stores various required root directories.
1168
+ project_configuration: ProjectConfiguration = ProjectConfiguration.load(
1169
+ project_name=instance.project_name,
1170
+ configuration_path=Path(instance.raw_data.project_configuration_path),
1171
+ )
1172
+
1173
+ # Resolves the new roots for all sections that use the same root as the raw_data directory:
1174
+
1175
+ # CONFIGURATION
1176
+ new_root = local_root.joinpath(instance.project_name, "configuration")
1177
+ instance.configuration_data.resolve_paths(
1178
+ root_directory_path=new_root,
1179
+ experiment_name=instance.experiment_name,
1180
+ )
1181
+
1182
+ # DEEPLABCUT
1183
+ new_root = local_root.joinpath(instance.project_name, "deeplabcut")
1184
+ instance.deeplabcut_data.resolve_paths(root_directory_path=new_root)
1185
+
1186
+ # Resolves the roots for all VRPC-specific sections that use the data from the ProjectConfiguration instance:
1187
+
1188
+ # VRPC PERSISTENT DATA
1189
+ new_root = Path(project_configuration.local_root_directory).joinpath(
1190
+ instance.project_name, instance.animal_id, "persistent_data"
1191
+ )
1192
+ instance.vrpc_persistent_data.resolve_paths(root_directory_path=new_root)
1193
+
1194
+ # SCANIMAGEPC PERSISTENT DATA
1195
+ new_root = Path(project_configuration.local_mesoscope_directory).joinpath(
1196
+ instance.project_name, instance.animal_id, "persistent_data"
1197
+ )
1198
+ instance.scanimagepc_persistent_data.resolve_paths(root_directory_path=new_root)
1199
+
1200
+ # MESOSCOPE DATA
1201
+ instance.mesoscope_data.resolve_paths(
1202
+ root_mesoscope_path=Path(project_configuration.local_mesoscope_directory),
1203
+ session_name=instance.session_name,
1204
+ )
1205
+
1206
+ # DESTINATIONS
1207
+ instance.destinations.resolve_paths(
1208
+ nas_raw_data_path=Path(project_configuration.local_nas_directory).joinpath(
1209
+ instance.project_name, instance.animal_id, instance.session_name, "raw_data"
1210
+ ),
1211
+ server_raw_data_path=Path(project_configuration.local_server_directory).joinpath(
1212
+ instance.project_name, instance.animal_id, instance.session_name, "raw_data"
1213
+ ),
1214
+ server_configuration_path=Path(project_configuration.local_server_directory).joinpath(
1215
+ instance.project_name, "configuration"
1216
+ ),
1217
+ server_processed_data_path=Path(project_configuration.local_server_working_directory).joinpath(
1218
+ instance.project_name, instance.animal_id, instance.session_name, "processed_data"
1219
+ ),
1220
+ )
1221
+
1222
+ # Resolves the paths to the processed_data directories. The resolution strategy depends on whether the method is
1223
+ # called on the VRPC (locally) or the BioHPC server (remotely).
1224
+ if not on_server:
1225
+ # Local runtimes use the same root session directory for both raw_data and processed_data. This stems from
1226
+ # the assumption that most local machines in the lab only use NVME (fast) volumes and, therefore, do not
1227
+ # need to separate 'storage' and 'working' data directories.
1228
+ new_root = local_root # Reuses the local root for non-server runtimes
1229
+
1230
+ else:
1231
+ # The BioHPC server stores raw_data on slow volume and processed_data on fast (NVME) volume. Therefore, to
1232
+ # configure processed_data paths, the method first needs to load the fast volume root path from the
1233
+ # project_configuration.yaml file stored in the raw_data folder.
1234
+ new_root = Path(project_configuration.remote_working_directory)
1235
+
1236
+ # Regenerates the processed_data path depending on the root resolution above
1237
+ instance.processed_data.resolve_paths(
1238
+ root_directory_path=new_root.joinpath(
1239
+ instance.project_name, instance.animal_id, instance.session_name, "processed_data"
1240
+ )
1241
+ )
1242
+
1243
+ # Ensures that project configuration and session data classes are present in both raw_data and processed_data
1244
+ # directories. This ensures that all data of the session can always be traced to the parent project, animal,
1245
+ # and session.
1246
+ sh.copy2(
1247
+ src=instance.raw_data.session_data_path,
1248
+ dst=instance.processed_data.session_data_path,
1249
+ )
1250
+ sh.copy2(
1251
+ src=instance.raw_data.project_configuration_path,
1252
+ dst=instance.processed_data.project_configuration_path,
1253
+ )
1254
+
1255
+ # Generates data directory hierarchies that may be missing on the local machine
1256
+ instance.raw_data.make_directories()
1257
+ instance.configuration_data.make_directories()
1258
+ instance.deeplabcut_data.make_directories()
1259
+ instance.processed_data.make_directories()
1260
+
1261
+ # Returns the initialized SessionData instance to caller
1262
+ return instance
1263
+
1264
+ def _save(self) -> None:
1265
+ """Saves the instance data to the 'raw_data' directory and the 'processed_data' directory of the managed session
1266
+ as a 'session_data.yaml' file.
1267
+
1268
+ This is used to save the data stored in the instance to disk, so that it can be reused during preprocessing or
1269
+ data processing. The method is intended to only be used by the SessionData instance itself during its
1270
+ create() method runtime.
1271
+ """
1272
+
1273
+ # Saves instance data as a .YAML file
1274
+ self.to_yaml(file_path=self.raw_data.session_data_path)
1275
+ self.to_yaml(file_path=self.processed_data.session_data_path)