sl-shared-assets 1.0.0rc20__py3-none-any.whl → 1.0.0rc21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (35) hide show
  1. sl_shared_assets/__init__.py +27 -27
  2. sl_shared_assets/__init__.pyi +24 -22
  3. sl_shared_assets/cli.py +266 -40
  4. sl_shared_assets/cli.pyi +73 -14
  5. sl_shared_assets/data_classes/__init__.py +23 -20
  6. sl_shared_assets/data_classes/__init__.pyi +18 -18
  7. sl_shared_assets/data_classes/configuration_data.py +407 -26
  8. sl_shared_assets/data_classes/configuration_data.pyi +172 -15
  9. sl_shared_assets/data_classes/runtime_data.py +49 -43
  10. sl_shared_assets/data_classes/runtime_data.pyi +37 -40
  11. sl_shared_assets/data_classes/session_data.py +168 -914
  12. sl_shared_assets/data_classes/session_data.pyi +55 -350
  13. sl_shared_assets/data_classes/surgery_data.py +3 -3
  14. sl_shared_assets/data_classes/surgery_data.pyi +2 -2
  15. sl_shared_assets/tools/__init__.py +8 -1
  16. sl_shared_assets/tools/__init__.pyi +11 -1
  17. sl_shared_assets/tools/ascension_tools.py +27 -26
  18. sl_shared_assets/tools/ascension_tools.pyi +5 -5
  19. sl_shared_assets/tools/packaging_tools.py +14 -1
  20. sl_shared_assets/tools/packaging_tools.pyi +4 -0
  21. sl_shared_assets/tools/project_management_tools.py +164 -0
  22. sl_shared_assets/tools/project_management_tools.pyi +48 -0
  23. {sl_shared_assets-1.0.0rc20.dist-info → sl_shared_assets-1.0.0rc21.dist-info}/METADATA +21 -4
  24. sl_shared_assets-1.0.0rc21.dist-info/RECORD +36 -0
  25. sl_shared_assets-1.0.0rc21.dist-info/entry_points.txt +8 -0
  26. sl_shared_assets/suite2p/__init__.py +0 -8
  27. sl_shared_assets/suite2p/__init__.pyi +0 -4
  28. sl_shared_assets/suite2p/multi_day.py +0 -224
  29. sl_shared_assets/suite2p/multi_day.pyi +0 -104
  30. sl_shared_assets/suite2p/single_day.py +0 -564
  31. sl_shared_assets/suite2p/single_day.pyi +0 -220
  32. sl_shared_assets-1.0.0rc20.dist-info/RECORD +0 -40
  33. sl_shared_assets-1.0.0rc20.dist-info/entry_points.txt +0 -4
  34. {sl_shared_assets-1.0.0rc20.dist-info → sl_shared_assets-1.0.0rc21.dist-info}/WHEEL +0 -0
  35. {sl_shared_assets-1.0.0rc20.dist-info → sl_shared_assets-1.0.0rc21.dist-info}/licenses/LICENSE +0 -0
@@ -11,40 +11,14 @@ import shutil as sh
11
11
  from pathlib import Path
12
12
  from dataclasses import field, dataclass
13
13
 
14
- import dacite
15
- import appdirs
16
14
  from ataraxis_base_utilities import LogLevel, console, ensure_directory_exists
17
15
  from ataraxis_data_structures import YamlConfig
18
16
  from ataraxis_time.time_helpers import get_timestamp
19
17
 
20
- from .configuration_data import ExperimentConfiguration
18
+ from .configuration_data import get_system_configuration_data
21
19
 
22
-
23
- def replace_root_path(path: Path) -> None:
24
- """Replaces the path to the local root directory used to store all Sun lab projects with the provided path.
25
-
26
- The first time ProjectConfiguration class is instantiated to create a new project on a new machine,
27
- it asks the user to provide the path to the local directory where to save all Sun lab projects. This path is then
28
- stored inside the default user data directory as a .yaml file to be reused for all future projects. To support
29
- replacing this path without searching for the user data directory, which is usually hidden, this function finds and
30
- updates the contents of the file that stores the local root path.
31
-
32
- Args:
33
- path: The path to the new local root directory.
34
- """
35
- # Resolves the path to the static .txt file used to store the local path to the root directory
36
- app_dir = Path(appdirs.user_data_dir(appname="sun_lab_data", appauthor="sun_lab"))
37
- path_file = app_dir.joinpath("root_path.txt")
38
-
39
- # In case this function is called before the app directory is created, ensures the app directory exists
40
- ensure_directory_exists(path_file)
41
-
42
- # Ensures that the input root directory exists
43
- ensure_directory_exists(path)
44
-
45
- # Replaces the contents of the root_path.txt file with the provided path
46
- with open(path_file, "w") as f:
47
- f.write(str(path))
20
+ # Stores all supported input for SessionData class 'session_type' fields.
21
+ _valid_session_types = {"lick training", "run training", "mesoscope experiment", "window checking"}
48
22
 
49
23
 
50
24
  @dataclass()
@@ -62,19 +36,11 @@ class ProjectConfiguration(YamlConfig):
62
36
  Together with SessionData, this class forms the entry point for all interactions with the data acquired in the
63
37
  Sun lab. The fields of this class are used to flexibly configure the runtime behavior of major data acquisition
64
38
  (sl-experiment) and processing (sl-forgery) libraries, adapting them for any project in the lab.
65
-
66
- Most lab projects only need to adjust the "surgery_sheet_id" and "water_log_sheet_id" fields of the class. Most
67
- fields in this class are used by the sl-experiment library to generate the SessionData class instance for each
68
- session and during experiment data acquisition and preprocessing. Data processing pipelines use specialized
69
- configuration files stored in other modules of this library.
70
-
71
- Although all path fields use str | Path datatype, they are always stored as Path objects. These fields are
72
- converted to strings only when the data is dumped as a .yaml file.
73
39
  """
74
40
 
75
41
  project_name: str = ""
76
42
  """Stores the descriptive name of the project. This name is used to create the root directory for the project and
77
- to discover and load project's data during runtime."""
43
+ to initialize SessionData instances each time any Sun lab library interacts with the session's data."""
78
44
  surgery_sheet_id: str = ""
79
45
  """The ID of the Google Sheet file that stores information about surgical interventions performed on all animals
80
46
  participating in the managed project. This log sheet is used to parse and write the surgical intervention data for
@@ -85,196 +51,36 @@ class ProjectConfiguration(YamlConfig):
85
51
  information for all animals participating in the managed project. This is used to synchronize the information
86
52
  inside the water restriction log with the state of the animal at the end of each training or experiment session.
87
53
  """
88
- google_credentials_path: str | Path = Path("/media/Data/Experiments/sl-surgery-log-0f651e492767.json")
89
- """
90
- The path to the locally stored .JSON file that contains the service account credentials used to read and write
91
- Google Sheet data. This is used to access and work with the surgery log and the water restriction log files.
92
- Usually, the same service account is used across all projects.
93
- """
94
- server_credentials_path: str | Path = Path("/media/Data/Experiments/server_credentials.yaml")
95
- """
96
- The path to the locally stored .YAML file that contains the credentials for accessing the BioHPC server machine.
97
- While the filesystem of the server machine should already be mounted to the local machine via SMB or equivalent
98
- protocol, this data is used to establish SSH connection to the server and start newly acquired data processing
99
- after it is transferred to the server. This allows data acquisition, preprocessing, and processing to be controlled
100
- by the same runtime and prevents unprocessed data from piling up on the server.
101
- """
102
- local_root_directory: str | Path = Path("/media/Data/Experiments")
103
- """The absolute path to the directory where all projects are stored on the local host-machine (VRPC). Note,
104
- this field is configured automatically each time the class is instantiated through any method, so overwriting it
105
- manually will not be respected."""
106
- local_server_directory: str | Path = Path("/home/cybermouse/server/storage/sun_data")
107
- """The absolute path to the directory where the raw data portion of all projects is stored on the BioHPC server.
108
- This directory should be locally accessible (mounted) using a network sharing protocol, such as SMB."""
109
- local_nas_directory: str | Path = Path("/home/cybermouse/nas/rawdata")
110
- """The absolute path to the directory where all projects are stored on the Synology NAS. This directory should be
111
- locally accessible (mounted) using a network sharing protocol, such as SMB."""
112
- local_mesoscope_directory: str | Path = Path("/home/cybermouse/scanimage/mesodata")
113
- """The absolute path to the root mesoscope (ScanImagePC) directory where all mesoscope-acquired data is aggregated
114
- during acquisition runtime. This directory should be locally accessible (mounted) using a network sharing
115
- protocol, such as SMB."""
116
- local_server_working_directory: str | Path = Path("/home/cybermouse/server/workdir/sun_data")
117
- """The absolute path to the directory where the processed data portion of all projects is stored on the BioHPC
118
- server. This directory should be locally accessible (mounted) using a network sharing protocol, such as SMB."""
119
- remote_storage_directory: str | Path = Path("/storage/sun_data")
120
- """The absolute path, relative to the BioHPC server root, to the directory where all projects are stored on the
121
- slow (SSD) volume of the server. This path is used when running remote (server-side) jobs and, therefore, has to
122
- be relative to the server root."""
123
- remote_working_directory: str | Path = Path("/workdir/sun_data")
124
- """The absolute path, relative to the BioHPC server root, to the directory where all projects are stored on the
125
- fast (NVME) volume of the server. This path is used when running remote (server-side) jobs and, therefore, has to
126
- be relative to the server root."""
127
- face_camera_index: int = 0
128
- """The index of the face camera in the list of all available Harvester-managed cameras."""
129
- left_camera_index: int = 0
130
- """The index of the left body camera in the list of all available OpenCV-managed cameras."""
131
- right_camera_index: int = 2
132
- """The index of the right body camera in the list of all available OpenCV-managed cameras."""
133
- harvesters_cti_path: str | Path = Path("/opt/mvIMPACT_Acquire/lib/x86_64/mvGenTLProducer.cti")
134
- """The path to the GeniCam CTI file used to connect to Harvesters-managed cameras."""
135
- actor_port: str = "/dev/ttyACM0"
136
- """The USB port used by the Actor Microcontroller."""
137
- sensor_port: str = "/dev/ttyACM1"
138
- """The USB port used by the Sensor Microcontroller."""
139
- encoder_port: str = "/dev/ttyACM2"
140
- """The USB port used by the Encoder Microcontroller."""
141
- headbar_port: str = "/dev/ttyUSB0"
142
- """The USB port used by the HeadBar Zaber motor controllers (devices)."""
143
- lickport_port: str = "/dev/ttyUSB1"
144
- """The USB port used by the LickPort Zaber motor controllers (devices)."""
145
- unity_ip: str = "127.0.0.1"
146
- """The IP address of the MQTT broker used to communicate with the Unity game engine. This is only used during
147
- experiment runtimes. Training runtimes ignore this parameter."""
148
- unity_port: int = 1883
149
- """The port number of the MQTT broker used to communicate with the Unity game engine. This is only used during
150
- experiment runtimes. Training runtimes ignore this parameter."""
151
- valve_calibration_data: dict[int | float, int | float] | tuple[tuple[int | float, int | float], ...] = (
152
- (15000, 1.8556),
153
- (30000, 3.4844),
154
- (45000, 7.1846),
155
- (60000, 10.0854),
156
- )
157
- """A tuple of tuples that maps water delivery solenoid valve open times, in microseconds, to the dispensed volume
158
- of water, in microliters. During training and experiment runtimes, this data is used by the ValveModule to translate
159
- the requested reward volumes into times the valve needs to be open to deliver the desired volume of water.
160
- """
161
54
 
162
55
  @classmethod
163
- def load(cls, project_name: str, configuration_path: None | Path = None) -> "ProjectConfiguration":
164
- """Loads the project configuration parameters from a project_configuration.yaml file.
56
+ def load(cls, configuration_path: Path) -> "ProjectConfiguration":
57
+ """Loads the project configuration parameters from the specified project_configuration.yaml file.
165
58
 
166
59
  This method is called during each interaction with any runtime session's data, including the creation of a new
167
- session. When this method is called for a non-existent (new) project name, it generates the default
168
- configuration file and prompts the user to update the configuration before proceeding with the runtime. All
169
- future interactions with the sessions from this project reuse the existing configuration file.
170
-
171
- Notes:
172
- As part of its runtime, the method may prompt the user to provide the path to the local root directory.
173
- This directory stores all project subdirectories and acts as the top level of the Sun lab data hierarchy.
174
- The path to the directory is then saved inside user's default data directory, so that it can be reused for
175
- all future projects. Use sl-replace-root CLI to replace the saved root directory path.
176
-
177
- Since this class is used for all Sun lab data structure interactions, this method supports multiple ways of
178
- loading class data. If this method is called as part of the sl-experiment new session creation pipeline, use
179
- 'project_name' argument. If this method is called as part of the sl-forgery data processing pipeline(s), use
180
- 'configuration_path' argument.
60
+ session.
181
61
 
182
62
  Args:
183
- project_name: The name of the project whose configuration file needs to be discovered and loaded or, if the
184
- project does not exist, created.
185
- configuration_path: Optional. The path to the project_configuration.yaml file from which to load the data.
186
- This way of resolving the configuration data source always takes precedence over the project_name when
187
- both are provided.
63
+ configuration_path: The path to the project_configuration.yaml file from which to load the data.
188
64
 
189
65
  Returns:
190
66
  The initialized ProjectConfiguration instance that stores the configuration data for the target project.
67
+
68
+ Raise:
69
+ FileNotFoundError: If the specified configuration file does not exist or is not a valid YAML file.
191
70
  """
192
71
 
193
- # If the configuration path is not provided, uses the 'default' resolution strategy that involves reading the
194
- # user's data directory
195
- if configuration_path is None:
196
- # Uses appdirs to locate the user data directory and resolve the path to the storage file
197
- app_dir = Path(appdirs.user_data_dir(appname="sl_assets", appauthor="sun_lab"))
198
- path_file = app_dir.joinpath("root_path.txt")
199
-
200
- # If the .txt file that stores the local root path does not exist, prompts the user to provide the path to
201
- # the local root directory and creates the root_path.txt file
202
- if not path_file.exists():
203
- # Gets the path to the local root directory from the user via command line input
204
- message = (
205
- "Unable to resolve the local root directory automatically. Provide the absolute path to the local "
206
- "directory that stores all project-specific directories. This is required when resolving project "
207
- "configuration based on project's name."
208
- )
209
- # noinspection PyTypeChecker
210
- console.echo(message=message, level=LogLevel.WARNING)
211
- root_path_str = input("Local root path: ")
212
- root_path = Path(root_path_str)
213
-
214
- # If necessary, generates the local root directory
215
- ensure_directory_exists(root_path)
216
-
217
- # Also ensures that the app directory exists, so that the path_file can be created below.
218
- ensure_directory_exists(path_file)
219
-
220
- # Saves the root path to the file
221
- with open(path_file, "w") as f:
222
- f.write(str(root_path))
223
-
224
- # Once the location of the path storage file is resolved, reads the root path from the file
225
- with open(path_file, "r") as f:
226
- root_path = Path(f.read().strip())
227
-
228
- # Uses the root experiment directory path to generate the path to the target project's configuration file.
229
- configuration_path = root_path.joinpath(project_name, "configuration", "project_configuration.yaml")
230
- ensure_directory_exists(configuration_path) # Ensures the directory tree for the config path exists.
231
-
232
- # If the configuration file does not exist (this is the first time this class is initialized for a given
233
- # project), generates a precursor (default) configuration file and prompts the user to update the configuration.
234
- if not configuration_path.exists():
72
+ # Prevents loading non-existent files.
73
+ if configuration_path.suffix != ".yaml" or not configuration_path.exists():
235
74
  message = (
236
- f"Unable to load project configuration data from disk as no 'project_configuration.yaml' file "
237
- f"found at the provided project path. Generating a precursor (default) configuration file under "
238
- f"{project_name}/configuration directory. Edit the file to specify project configuration before "
239
- f"proceeding further to avoid runtime errors. Also, edit other configuration precursors saved to the "
240
- f"same directory to control other aspects of data acquisition and processing."
75
+ f"Unable to load the project configuration data from the specified path: {configuration_path}. Valid "
76
+ f"configuration file paths should use the '.yaml' extension and point to an existing file."
241
77
  )
242
- # noinspection PyTypeChecker
243
- console.echo(message=message, level=LogLevel.WARNING)
244
-
245
- # Generates the default project configuration instance and dumps it as a .yaml file. Note, as part of
246
- # this process, the class generates the correct 'local_root_path' based on the path provided by the
247
- # user.
248
- precursor = ProjectConfiguration(local_root_directory=Path(str(configuration_path.parents[2])))
249
- precursor.project_name = project_name
250
- precursor.save(path=configuration_path)
251
-
252
- # Waits for the user to manually configure the newly created file.
253
- input(f"Enter anything to continue: ")
78
+ console.error(message=message, error=FileNotFoundError)
254
79
 
255
- # Loads the data from the YAML file and initializes the class instance. This now uses either the automatically
256
- # resolved configuration path or the manually provided path
80
+ # Loads the data from the YAML file and initializes the class instance.
257
81
  instance: ProjectConfiguration = cls.from_yaml(file_path=configuration_path) # type: ignore
258
82
 
259
- # Converts all paths loaded as strings to Path objects used inside the library
260
- instance.local_mesoscope_directory = Path(instance.local_mesoscope_directory)
261
- instance.local_nas_directory = Path(instance.local_nas_directory)
262
- instance.local_server_directory = Path(instance.local_server_directory)
263
- instance.local_server_working_directory = Path(instance.local_server_working_directory)
264
- instance.remote_storage_directory = Path(instance.remote_storage_directory)
265
- instance.remote_working_directory = Path(instance.remote_working_directory)
266
- instance.google_credentials_path = Path(instance.google_credentials_path)
267
- instance.server_credentials_path = Path(instance.server_credentials_path)
268
- instance.harvesters_cti_path = Path(instance.harvesters_cti_path)
269
-
270
- # Local root path is always re-computed from the resolved configuration file's location
271
- instance.local_root_directory = Path(str(configuration_path.parents[2]))
272
-
273
- # Converts valve_calibration data from dictionary to a tuple of tuples format
274
- if not isinstance(instance.valve_calibration_data, tuple):
275
- instance.valve_calibration_data = tuple((k, v) for k, v in instance.valve_calibration_data.items())
276
-
277
- # Partially verifies the loaded data. Most importantly, this step does not allow proceeding if the user did not
83
+ # Verifies the loaded data. Most importantly, this step does not allow proceeding if the user did not
278
84
  # replace the surgery log and water restriction log placeholders with valid ID values.
279
85
  instance._verify_data()
280
86
 
@@ -284,46 +90,16 @@ class ProjectConfiguration(YamlConfig):
284
90
  def save(self, path: Path) -> None:
285
91
  """Saves class instance data to disk as a project_configuration.yaml file.
286
92
 
287
- This method is automatically called when a new project is created. After this method's runtime, all future
288
- calls to the load() method will reuse the configuration data saved to the .yaml file.
289
-
290
- Notes:
291
- When this method is used to generate the configuration .yaml file for a new project, it also generates the
292
- example 'default_experiment.yaml'. This file is designed to showcase how to write ExperimentConfiguration
293
- data files that are used to control Mesoscope-VR system states during experiment session runtimes.
93
+ This method is automatically called from the 'sl_experiment' library when a new project is created. After this
94
+ method's runtime, all future project initialization calls will use the load() method to reuse configuration data
95
+ saved to the .yaml file created by this method.
294
96
 
295
97
  Args:
296
98
  path: The path to the .yaml file to save the data to.
297
99
  """
298
100
 
299
- # Copies instance data to prevent it from being modified by reference when executing the steps below
300
- original = copy.deepcopy(self)
301
-
302
- # Converts all Path objects to strings before dumping the data, as .yaml encoder does not properly recognize
303
- # Path objects
304
- original.local_root_directory = str(original.local_root_directory)
305
- original.local_mesoscope_directory = str(original.local_mesoscope_directory)
306
- original.local_nas_directory = str(original.local_nas_directory)
307
- original.local_server_directory = str(original.local_server_directory)
308
- original.local_server_working_directory = str(original.local_server_working_directory)
309
- original.remote_storage_directory = str(original.remote_storage_directory)
310
- original.remote_working_directory = str(original.remote_working_directory)
311
- original.google_credentials_path = str(original.google_credentials_path)
312
- original.server_credentials_path = str(original.server_credentials_path)
313
- original.harvesters_cti_path = str(original.harvesters_cti_path)
314
-
315
- # Converts valve calibration data into dictionary format
316
- if isinstance(original.valve_calibration_data, tuple):
317
- original.valve_calibration_data = {k: v for k, v in original.valve_calibration_data}
318
-
319
101
  # Saves the data to the YAML file
320
- original.to_yaml(file_path=path)
321
-
322
- # As part of this runtime, also generates and dumps the 'precursor' experiment configuration file.
323
- experiment_configuration_path = path.parent.joinpath("default_experiment.yaml")
324
- if not experiment_configuration_path.exists():
325
- example_experiment = ExperimentConfiguration()
326
- example_experiment.to_yaml(experiment_configuration_path)
102
+ self.to_yaml(file_path=path)
327
103
 
328
104
  def _verify_data(self) -> None:
329
105
  """Verifies the user-modified data loaded from the project_configuration.yaml file.
@@ -333,11 +109,6 @@ class ProjectConfiguration(YamlConfig):
333
109
  runtime behavior of the libraries using this class. This internal method is automatically called by the load()
334
110
  method.
335
111
 
336
- Notes:
337
- The method does not verify all fields loaded from the configuration file and instead focuses on fields that
338
- do not have valid default values. Since these fields are expected to be frequently modified by users, they
339
- are the ones that require additional validation.
340
-
341
112
  Raises:
342
113
  ValueError: If the loaded data does not match expected formats or values.
343
114
  """
@@ -366,9 +137,12 @@ class RawData:
366
137
  """Stores the paths to the directories and files that make up the 'raw_data' session-specific directory.
367
138
 
368
139
  The raw_data directory stores the data acquired during the session runtime before and after preprocessing. Since
369
- preprocessing does not alter the data, any data in that folder is considered 'raw'. The raw_data folder is initially
370
- created on the VRPC and, after preprocessing, is copied to the BioHPC server and the Synology NAS for long-term
371
- storage and further processing.
140
+ preprocessing does not alter the data, any data in that folder is considered 'raw'.
141
+
142
+ Notes:
143
+ Sun lab data management strategy primarily relies on keeping multiple redundant copies of the raw_data for
144
+ each acquired session. Typically, one copy is stored on the lab's processing server and the other is stored on
145
+ the NAS.
372
146
  """
373
147
 
374
148
  raw_data_path: Path = Path()
@@ -380,25 +154,28 @@ class RawData:
380
154
  includes .mp4 video files from each recorded camera."""
381
155
  mesoscope_data_path: Path = Path()
382
156
  """Stores the path to the directory that contains all Mesoscope data acquired during the session. Primarily, this
383
- includes the mesoscope-acquired .tiff files (brain activity data) and the motion estimation data."""
157
+ includes the mesoscope-acquired .tiff files (brain activity data) and the motion estimation data. This directory is
158
+ created for all sessions, but is only used (filled) by the sessions that use the Mesoscope-VR system to acquire
159
+ brain activity data."""
384
160
  behavior_data_path: Path = Path()
385
- """Stores the path to the directory that contains all behavior data acquired during the session. Primarily, this
386
- includes the .npz log files used by data-acquisition libraries to store all acquired data. The data stored in this
387
- way includes the camera and mesoscope frame timestamps and the states of Mesoscope-VR components, such as lick
388
- sensors, rotary encoders, and other modules."""
161
+ """Stores the path to the directory that contains all non-video behavior data acquired during the session.
162
+ Primarily, this includes the .npz log files that store serialized data acquired by all hardware components of the
163
+ data acquisition system other than cameras and brain activity data acquisition devices (such as the Mesoscope).
164
+ The reason why the directory is called 'behavior' is primarily because all .npz files are parsed to infer the
165
+ behavior of the animal, in contrast to brain (cell) activity data."""
389
166
  zaber_positions_path: Path = Path()
390
167
  """Stores the path to the zaber_positions.yaml file. This file contains the snapshot of all Zaber motor positions
391
168
  at the end of the session. Zaber motors are used to position the LickPort and the HeadBar manipulators, which is
392
- essential for supporting proper brain imaging and animal's running behavior during the session."""
169
+ essential for supporting proper brain imaging and animal's running behavior during the session. This file is only
170
+ created for sessions that use the Mesoscope-VR system."""
393
171
  session_descriptor_path: Path = Path()
394
172
  """Stores the path to the session_descriptor.yaml file. This file is partially filled by the system during runtime
395
173
  and partially by the experimenter after the runtime. It contains session-specific information, such as the specific
396
- training parameters, the positions of the Mesoscope objective and the notes made by the experimenter during
397
- runtime."""
398
- hardware_configuration_path: Path = Path()
399
- """Stores the path to the hardware_configuration.yaml file. This file contains the partial snapshot of the
400
- calibration parameters used by the Mesoscope-VR system components during runtime. Primarily, this is used during
401
- data processing to read the .npz data log files generated during runtime."""
174
+ task parameters and the notes made by the experimenter during runtime."""
175
+ hardware_state_path: Path = Path()
176
+ """Stores the path to the hardware_state.yaml file. This file contains the partial snapshot of the calibration
177
+ parameters used by the data acquisition and runtime management system modules during the session. Primarily,
178
+ this is used during data processing to read the .npz data log files generated during runtime."""
402
179
  surgery_metadata_path: Path = Path()
403
180
  """Stores the path to the surgery_metadata.yaml file. This file contains the most actual information about the
404
181
  surgical intervention(s) performed on the animal prior to the session."""
@@ -407,29 +184,39 @@ class RawData:
407
184
  parameters for the session's project."""
408
185
  session_data_path: Path = Path()
409
186
  """Stores the path to the session_data.yaml file. This path is used by the SessionData instance to save itself to
410
- disk as a .yaml file. The file contains all paths used during data acquisition and processing on both the VRPC and
411
- the BioHPC server."""
187
+ disk as a .yaml file. The file contains the paths to all raw and processed data directories used during data
188
+ acquisition or processing runtime."""
412
189
  experiment_configuration_path: Path = Path()
413
190
  """Stores the path to the experiment_configuration.yaml file. This file contains the snapshot of the
414
- experiment runtime configuration used by the session. This file is only created for experiment session. It does not
415
- exist for behavior training sessions."""
191
+ experiment runtime configuration used by the session. This file is only created for experiment sessions."""
416
192
  mesoscope_positions_path: Path = Path()
417
193
  """Stores the path to the mesoscope_positions.yaml file. This file contains the snapshot of the positions used
418
194
  by the Mesoscope at the end of the session. This includes both the physical position of the mesoscope objective and
419
- the 'virtual' tip, tilt, and fastZ positions set via ScanImage software. This file is only created for experiment
420
- sessions that use the mesoscope, it is omitted for behavior training sessions."""
195
+ the 'virtual' tip, tilt, and fastZ positions set via ScanImage software. This file is only created for sessions that
196
+ use the Mesoscope-VR system to acquire brain activity data."""
421
197
  window_screenshot_path: Path = Path()
422
198
  """Stores the path to the .png screenshot of the ScanImagePC screen. The screenshot should contain the image of the
423
199
  cranial window and the red-dot alignment windows. This is used to generate a visual snapshot of the cranial window
424
- alignment and appearance for each experiment session. This file is only created for experiment sessions that use
425
- the mesoscope, it is omitted for behavior training sessions."""
426
- telomere_path: Path = Path()
427
- """Stores the path to the telomere.bin file. This file is created by the data processing pipelines running on the
428
- BioHPC server to confirm that the raw_data transferred to the server was not altered or damage in transmission."""
200
+ alignment and appearance for each experiment session. This file is only created for sessions that use the
201
+ Mesoscope-VR system to acquire brain activity data."""
202
+ system_configuration_path: Path = Path()
203
+ """Stores the path to the system_configuration.yaml file. This file contains the exact snapshot of the data
204
+ acquisition and runtime management system configuration parameters used to acquire session data."""
429
205
  checksum_path: Path = Path()
430
206
  """Stores the path to the ax_checksum.txt file. This file is generated as part of packaging the data for
431
207
  transmission and stores the xxHash-128 checksum of the data. It is used to verify that the transmission did not
432
208
  damage or otherwise alter the data."""
209
+ telomere_path: Path = Path()
210
+ """Stores the path to the telomere.bin file. This file is statically generated at the end of the session's data
211
+ acquisition based on experimenter feedback to mark sessions that ran in-full with no issues. Sessions without a
212
+ telomere.bin file are considered 'incomplete' and are excluded from all automated processing, as they may contain
213
+ corrupted, incomplete, or otherwise unusable data."""
214
+ ubiquitin_path: Path = Path()
215
+ """Stores the path to the ubiquitin.bin file. This file is primarily used by the sl-experiment libraries to mark
216
+ local session data directories for deletion (purging). Typically, it is created once the data is safely moved to
217
+ the long-term storage destinations (NAS and Server) and the integrity of the moved data is verified on at least one
218
+ destination. During 'purge' sl-experiment runtimes, the library discovers and removes all session data marked with
219
+ 'ubiquitin.bin' files from the machine that runs the code."""
433
220
 
434
221
  def resolve_paths(self, root_directory_path: Path) -> None:
435
222
  """Resolves all paths managed by the class instance based on the input root directory path.
@@ -450,124 +237,26 @@ class RawData:
450
237
  self.behavior_data_path = self.raw_data_path.joinpath("behavior_data")
451
238
  self.zaber_positions_path = self.raw_data_path.joinpath("zaber_positions.yaml")
452
239
  self.session_descriptor_path = self.raw_data_path.joinpath("session_descriptor.yaml")
453
- self.hardware_configuration_path = self.raw_data_path.joinpath("hardware_configuration.yaml")
240
+ self.hardware_state_path = self.raw_data_path.joinpath("hardware_state.yaml")
454
241
  self.surgery_metadata_path = self.raw_data_path.joinpath("surgery_metadata.yaml")
455
242
  self.project_configuration_path = self.raw_data_path.joinpath("project_configuration.yaml")
456
243
  self.session_data_path = self.raw_data_path.joinpath("session_data.yaml")
457
244
  self.experiment_configuration_path = self.raw_data_path.joinpath("experiment_configuration.yaml")
458
245
  self.mesoscope_positions_path = self.raw_data_path.joinpath("mesoscope_positions.yaml")
459
246
  self.window_screenshot_path = self.raw_data_path.joinpath("window_screenshot.png")
460
- self.telomere_path = self.raw_data_path.joinpath("telomere.bin")
461
247
  self.checksum_path = self.raw_data_path.joinpath("ax_checksum.txt")
248
+ self.system_configuration_path = self.raw_data_path.joinpath("system_configuration.yaml")
249
+ self.telomere_path = self.raw_data_path.joinpath("telomere.bin")
250
+ self.ubiquitin_path = self.raw_data_path.joinpath("ubiquitin.bin")
462
251
 
463
252
  def make_directories(self) -> None:
464
- """Ensures that all major subdirectories and the root directory exist."""
253
+ """Ensures that all major subdirectories and the root directory exist, creating any missing directories."""
465
254
  ensure_directory_exists(self.raw_data_path)
466
255
  ensure_directory_exists(self.camera_data_path)
467
256
  ensure_directory_exists(self.mesoscope_data_path)
468
257
  ensure_directory_exists(self.behavior_data_path)
469
258
 
470
259
 
471
- @dataclass()
472
- class DeepLabCutData:
473
- """Stores the paths to the directories and files that make up the 'deeplabcut' project-specific directory.
474
-
475
- DeepLabCut (DLC) is used to track animal body parts and poses in video data acquired during experiment and training
476
- sessions. Since DLC is designed to work with projects, rather than single animals or sessions, each Sun lab
477
- project data hierarchy contains a dedicated 'deeplabcut' directory under the root project directory. The contents of
478
- that directory are largely managed by the DLC itself. Therefore, each session of a given project refers to and
479
- uses the same 'deeplabcut' directory.
480
- """
481
-
482
- deeplabcut_path: Path = Path()
483
- """Stores the path to the project-specific DeepLabCut directory. This folder stores all DeepLabCut data specific to
484
- a single project, which is reused during the processing of all sessions of the project."""
485
-
486
- def resolve_paths(self, root_directory_path: Path) -> None:
487
- """Resolves all paths managed by the class instance based on the input root directory path.
488
-
489
- This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
490
- machine that instantiates the class.
491
-
492
- Args:
493
- root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
494
- hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
495
- the managed session.
496
- """
497
-
498
- # Generates the managed paths
499
- self.deeplabcut_path = root_directory_path
500
-
501
- def make_directories(self) -> None:
502
- """Ensures that all major subdirectories and the root directory exist."""
503
- ensure_directory_exists(self.deeplabcut_path)
504
-
505
-
506
- @dataclass()
507
- class ConfigurationData:
508
- """Stores the paths to the directories and files that make up the 'configuration' project-specific directory.
509
-
510
- The configuration directory contains various configuration files and settings used by data acquisition,
511
- preprocessing, and processing pipelines in the lab. Generally, all configuration settings are defined once for each
512
- project and are reused for every session within the project. Therefore, this directory is created under each main
513
- project directory.
514
-
515
- Notes:
516
- Some attribute names inside this section match the names in the RawData section. This is intentional, as some
517
- configuration files are copied into the raw_data session directories to allow reinstating the session data
518
- hierarchy across machines.
519
- """
520
-
521
- configuration_path: Path = Path()
522
- """Stores the path to the project-specific configuration directory. This directory is used by all animals
523
- and sessions of the project to store all pan-project configuration files. The configuration data is reused by all
524
- sessions in the project."""
525
- experiment_configuration_path: Path = Path()
526
- """Stores the path to the experiment_configuration.yaml file. This file contains the snapshot of the
527
- experiment runtime configuration used by the session. This file is only created for experiment session. It does not
528
- exist for behavior training sessions."""
529
- project_configuration_path: Path = Path()
530
- """Stores the path to the project_configuration.yaml file. This file contains the snapshot of the configuration
531
- parameters for the session's project."""
532
- single_day_s2p_configuration_path: Path = Path()
533
- """Stores the path to the single_day_s2p_configuration.yaml file stored inside the project's 'configuration'
534
- directory on the fast BioHPC server volume. This configuration file specifies the parameters for the 'single day'
535
- suite2p registration pipeline, which is applied to each session that generates brain activity data."""
536
- multi_day_s2p_configuration_path: Path = Path()
537
- """Stores the path to the multi_day_s2p_configuration.yaml file stored inside the project's 'configuration'
538
- directory on the fast BioHPC server volume. This configuration file specifies the parameters for the 'multiday'
539
- sl-suite2p-based registration pipelines used tot rack brain cells across multiple sessions."""
540
-
541
- def resolve_paths(self, root_directory_path: Path, experiment_name: str | None = None) -> None:
542
- """Resolves all paths managed by the class instance based on the input root directory path.
543
-
544
- This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
545
- machine that instantiates the class.
546
-
547
- Args:
548
- root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
549
- hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
550
- the managed session.
551
- experiment_name: Optionally specifies the name of the experiment executed as part of the managed session's
552
- runtime. This is used to correctly configure the path to the specific ExperimentConfiguration data file.
553
- If the managed session is not an Experiment session, this parameter should be set to None.
554
- """
555
-
556
- # Generates the managed paths
557
- self.configuration_path = root_directory_path
558
- if experiment_name is None:
559
- self.experiment_configuration_path = self.configuration_path.joinpath("null")
560
- else:
561
- self.experiment_configuration_path = self.configuration_path.joinpath(f"{experiment_name}.yaml")
562
- self.project_configuration_path = self.configuration_path.joinpath("project_configuration.yaml")
563
- self.single_day_s2p_configuration_path = self.configuration_path.joinpath("single_day_s2p_configuration.yaml")
564
- self.multi_day_s2p_configuration_path = self.configuration_path.joinpath("multi_day_s2p_configuration.yaml")
565
-
566
- def make_directories(self) -> None:
567
- """Ensures that all major subdirectories and the root directory exist."""
568
- ensure_directory_exists(self.configuration_path)
569
-
570
-
571
260
  @dataclass()
572
261
  class ProcessedData:
573
262
  """Stores the paths to the directories and files that make up the 'processed_data' session-specific directory.
@@ -575,16 +264,6 @@ class ProcessedData:
575
264
  The processed_data directory stores the data generated by various processing pipelines from the raw data (contents
576
265
  of the raw_data directory). Processed data represents an intermediate step between raw data and the dataset used in
577
266
  the data analysis, but is not itself designed to be analyzed.
578
-
579
- Notes:
580
- The paths from this section are typically used only on the BioHPC server. This is because most data processing
581
- in the lab is performed using the processing server's resources. On the server, processed data is stored on
582
- the fast (NVME) drive volume, in contrast to raw data, which is stored on the slow (SSD) drive volume.
583
-
584
- When this class is instantiated on a machine other than BioHPC server, for example, to test processing
585
- pipelines, it uses the same drive as the raw_data folder to create the processed_data folder. This relies on the
586
- assumption that non-server machines in the lab only use fast NVME drives, so there is no need to separate
587
- storage and processing volumes.
588
267
  """
589
268
 
590
269
  processed_data_path: Path = Path()
@@ -595,22 +274,34 @@ class ProcessedData:
595
274
  processing pipelines."""
596
275
  mesoscope_data_path: Path = Path()
597
276
  """Stores path to the directory that contains processed brain activity (cell) data generated by our suite2p-based
598
- photometry processing pipelines (single day and multi day)."""
277
+ photometry processing pipelines (single-day and multi-day). This directory is only used by sessions acquired with
278
+ the Mesoscope-VR system. For all other sessions, it will be created, but kept empty."""
599
279
  behavior_data_path: Path = Path()
600
- """Stores the path to the directory that contains the non-video behavior and system runtime data extracted from
280
+ """Stores the path to the directory that contains the non-video and non-brain-activity data extracted from
601
281
  .npz log files by our in-house log parsing pipeline."""
602
282
  job_logs_path: Path = Path()
603
283
  """Stores the path to the directory that stores the standard output and standard error data collected during
604
- server-side data processing pipeline runtimes. Since we use SLURM job manager to execute multiple compute jobs on
605
- the BioHPC server, all information sent to the terminal during runtime is redirected to text files stored in this
606
- directory."""
607
- project_configuration_path: Path = Path()
608
- """Stores the path to the project_configuration.yaml file. This file contains the snapshot of the configuration
609
- parameters for the session's project."""
610
- session_data_path: Path = Path()
611
- """Stores the path to the session_data.yaml file. This path is used by the SessionData instance to save itself to
612
- disk as a .yaml file. The file contains all paths used during data acquisition and processing on both the VRPC and
613
- the BioHPC server."""
284
+ server-side data processing pipeline runtimes. This directory is primarily used when running data processing jobs
285
+ on the remote server. However, it is possible to configure local runtimes to also redirect log data to files
286
+ stored in this directory (by editing ataraxis-base-utilities 'console' variable)."""
287
+ single_day_suite2p_bin_path: Path = Path()
288
+ """Stores the path to the single_day_suite2p.bin file. This file is created by our single-day suite2p data
289
+ processing pipeline to mark sessions that have been successfully processed with the single-day sl-suite2p library
290
+ pipeline. Note, the file is removed at the beginning of the suite2p pipeline, so its presence always indicates
291
+ successful processing runtime completion."""
292
+ multi_day_suite2p_bin_path: Path = Path()
293
+ """Same as single_day_suite2p_bin_path, but tracks whether the session has been successfully processed with the
294
+ multi-day suite2p pipeline."""
295
+ behavior_bin_path: Path = Path()
296
+ """Stores the path to the behavior.bin file. This file is created by our behavior data extraction pipeline
297
+ to mark sessions that have been successfully processed with the sl-behavior library pipeline. Note, the
298
+ file is removed at the beginning of the behavior data extraction pipeline, so its presence always indicates
299
+ successful processing runtime completion."""
300
+ dlc_bin_path: Path = Path()
301
+ """Stores the path to the dlc.bin file. This file is created by our DeepLabCut-based pose tracking pipeline
302
+ to mark sessions that have been successfully processed with the sl-dlc library pipeline. Note, the
303
+ file is removed at the beginning of the DeepLabCut pipeline, so its presence always indicates successful processing
304
+ runtime completion."""
614
305
 
615
306
  def resolve_paths(self, root_directory_path: Path) -> None:
616
307
  """Resolves all paths managed by the class instance based on the input root directory path.
@@ -629,11 +320,13 @@ class ProcessedData:
629
320
  self.mesoscope_data_path = self.processed_data_path.joinpath("mesoscope_data")
630
321
  self.behavior_data_path = self.processed_data_path.joinpath("behavior_data")
631
322
  self.job_logs_path = self.processed_data_path.joinpath("job_logs")
632
- self.project_configuration_path = self.processed_data_path.joinpath("project_configuration.yaml")
633
- self.session_data_path = self.processed_data_path.joinpath("session_data.yaml")
323
+ self.single_day_suite2p_bin_path = self.processed_data_path.joinpath("single_day_suite2p.bin")
324
+ self.multi_day_suite2p_bin_path = self.processed_data_path.joinpath("multi_day_suite2p.bin")
325
+ self.behavior_bin_path = self.processed_data_path.joinpath("behavior.bin")
326
+ self.dlc_bin_path = self.processed_data_path.joinpath("dlc.bin")
634
327
 
635
328
  def make_directories(self) -> None:
636
- """Ensures that all major subdirectories and the root directory exist."""
329
+ """Ensures that all major subdirectories and the root directory exist, creating any missing directories."""
637
330
 
638
331
  ensure_directory_exists(self.processed_data_path)
639
332
  ensure_directory_exists(self.camera_data_path)
@@ -641,239 +334,9 @@ class ProcessedData:
641
334
  ensure_directory_exists(self.job_logs_path)
642
335
 
643
336
 
644
- @dataclass()
645
- class VRPCPersistentData:
646
- """Stores the paths to the directories and files that make up the 'persistent_data' directory on the VRPC.
647
-
648
- Persistent data directories are only used during data acquisition. Therefore, unlike most other directories, they
649
- are purposefully designed for specific PCs that participate in data acquisition. This section manages the
650
- animal-specific persistent_data directory stored on the VRPC.
651
-
652
- VRPC persistent data directory is used to preserve configuration data, such as the positions of Zaber motors and
653
- Meososcope objective, so that they can be reused across sessions of the same animals. The data in this directory
654
- is read at the beginning of each session and replaced at the end of each session.
655
- """
656
-
657
- persistent_data_path: Path = Path()
658
- """Stores the path to the project and animal specific 'persistent_data' directory to which the managed session
659
- belongs, relative to the VRPC root. This directory is exclusively used on the VRPC."""
660
- zaber_positions_path: Path = Path()
661
- """Stores the path to the Zaber motor positions snapshot generated at the end of the previous session runtime. This
662
- is used to automatically restore all Zaber motors to the same position across all sessions."""
663
- mesoscope_positions_path: Path = Path()
664
- """Stores the path to the Mesoscope positions snapshot generated at the end of the previous session runtime. This
665
- is used to help the user to (manually) restore the Mesoscope to the same position across all sessions."""
666
-
667
- def resolve_paths(self, root_directory_path: Path) -> None:
668
- """Resolves all paths managed by the class instance based on the input root directory path.
669
-
670
- This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
671
- machine that instantiates the class.
672
-
673
- Args:
674
- root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
675
- hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
676
- the managed session.
677
- """
678
-
679
- # Generates the managed paths
680
- self.persistent_data_path = root_directory_path
681
- self.zaber_positions_path = self.persistent_data_path.joinpath("zaber_positions.yaml")
682
- self.mesoscope_positions_path = self.persistent_data_path.joinpath("mesoscope_positions.yaml")
683
-
684
- def make_directories(self) -> None:
685
- """Ensures that all major subdirectories and the root directory exist."""
686
-
687
- ensure_directory_exists(self.persistent_data_path)
688
-
689
-
690
- @dataclass()
691
- class ScanImagePCPersistentData:
692
- """Stores the paths to the directories and files that make up the 'persistent_data' directory on the ScanImagePC.
693
-
694
- Persistent data directories are only used during data acquisition. Therefore, unlike most other directories, they
695
- are purposefully designed for specific PCs that participate in data acquisition. This section manages the
696
- animal-specific persistent_data directory stored on the ScanImagePC (Mesoscope PC).
697
-
698
- ScanImagePC persistent data directory is used to preserve the motion estimation snapshot, generated during the first
699
- experiment session. This is necessary to align the brain recording field of view across sessions. In turn, this
700
- is used to carry out 'online' motion and z-drift correction, improving the accuracy of across-day (multi-day)
701
- cell tracking.
702
- """
703
-
704
- persistent_data_path: Path = Path()
705
- """Stores the path to the project and animal specific 'persistent_data' directory to which the managed session
706
- belongs, relative to the ScanImagePC root. This directory is exclusively used on the ScanImagePC (Mesoscope PC)."""
707
- motion_estimator_path: Path = Path()
708
- """Stores the 'reference' motion estimator file generated during the first experiment session of each animal. This
709
- file is kept on the ScanImagePC to image the same population of cells across all experiment sessions."""
710
-
711
- def resolve_paths(self, root_directory_path: Path) -> None:
712
- """Resolves all paths managed by the class instance based on the input root directory path.
713
-
714
- This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
715
- machine that instantiates the class.
716
-
717
- Args:
718
- root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
719
- hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
720
- the managed session.
721
- """
722
-
723
- # Generates the managed paths
724
- self.persistent_data_path = root_directory_path
725
- self.motion_estimator_path = self.persistent_data_path.joinpath("MotionEstimator.me")
726
-
727
- def make_directories(self) -> None:
728
- """Ensures that all major subdirectories and the root directory exist."""
729
-
730
- ensure_directory_exists(self.persistent_data_path)
731
-
732
-
733
- @dataclass()
734
- class MesoscopeData:
735
- """Stores the paths to the directories and files that make up the 'meso_data' directory on the ScanImagePC.
736
-
737
- The meso_data directory is the root directory where all mesoscope-generated data is stored on the ScanImagePC. The
738
- path to this directory should be given relative to the VRPC root and be mounted to the VRPC filesystem via the
739
- SMB or equivalent protocol.
740
-
741
- During runtime, the ScanImagePC should organize all collected data under this root directory. During preprocessing,
742
- the VRPC uses SMB to access the data in this directory and merge it into the 'raw_data' session directory. The paths
743
- in this section, therefore, are specific to the VRPC and are not used on other PCs.
744
- """
745
-
746
- meso_data_path: Path = Path()
747
- """Stores the path to the root ScanImagePC data directory, mounted to the VRPC filesystem via the SMB or equivalent
748
- protocol. All mesoscope-generated data is stored under this root directory before it is merged into the VRPC-managed
749
- raw_data directory of each session."""
750
- mesoscope_data_path: Path = Path()
751
- """Stores the path to the 'default' mesoscope_data directory. All experiment sessions across all animals and
752
- projects use the same mesoscope_data directory to save the data generated by the mesoscope via ScanImage
753
- software. This simplifies ScanImagePC configuration process during runtime, as all data is always saved in the same
754
- directory. During preprocessing, the data is moved from the default directory first into a session-specific
755
- ScanImagePC directory and then into the VRPC raw_data session directory."""
756
- session_specific_path: Path = Path()
757
- """Stores the path to the session-specific data directory. This directory is generated at the end of each experiment
758
- runtime to prepare mesoscope data for being moved to the VRPC-managed raw_data directory and to reset the 'default'
759
- mesoscope_data directory for the next session's runtime."""
760
- ubiquitin_path: Path = Path()
761
- """Stores the path to the 'ubiquitin.bin' file. This file is automatically generated inside the session-specific
762
- data directory after its contents are safely transferred to the VRPC as part of preprocessing. During redundant data
763
- removal step of preprocessing, the VRPC searches for directories marked with ubiquitin.bin and deletes them from the
764
- ScanImagePC filesystem."""
765
-
766
- def resolve_paths(self, root_mesoscope_path: Path, session_name: str) -> None:
767
- """Resolves all paths managed by the class instance based on the input root directory path.
768
-
769
- This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
770
- machine that instantiates the class.
771
-
772
- Args:
773
- root_mesoscope_path: The path to the top-level directory of the ScanImagePC data hierarchy mounted to the
774
- VRPC via the SMB or equivalent protocol.
775
- session_name: The name of the session for which this subclass is initialized.
776
- """
777
-
778
- # Generates the managed paths
779
- self.meso_data_path = root_mesoscope_path
780
- self.session_specific_path = self.meso_data_path.joinpath(session_name)
781
- self.ubiquitin_path = self.session_specific_path.joinpath("ubiquitin.bin")
782
- self.mesoscope_data_path = self.meso_data_path.joinpath("mesoscope_data")
783
-
784
- def make_directories(self) -> None:
785
- """Ensures that all major subdirectories and the root directory exist."""
786
-
787
- ensure_directory_exists(self.meso_data_path)
788
-
789
-
790
- @dataclass()
791
- class VRPCDestinations:
792
- """Stores the paths to the VRPC filesystem-mounted directories of the Synology NAS and BioHPC server.
793
-
794
- The paths from this section are primarily used to transfer preprocessed data to the long-term storage destinations.
795
- Additionally, they allow VRPC to interface with the configuration directory of the BioHPC server to start data
796
- processing jobs and to read the data from the processed_data directory to remove redundant data from the VRPC
797
- filesystem.
798
-
799
- Overall, this section is intended solely for the VRPC and should not be used on other PCs.
800
- """
801
-
802
- nas_raw_data_path: Path = Path()
803
- """Stores the path to the session's raw_data directory on the Synology NAS, which is mounted to the VRPC via the
804
- SMB or equivalent protocol."""
805
- server_raw_data_path: Path = Path()
806
- """Stores the path to the session's raw_data directory on the BioHPC server, which is mounted to the VRPC via the
807
- SMB or equivalent protocol."""
808
- server_processed_data_path: Path = Path()
809
- """Stores the path to the session's processed_data directory on the BioHPC server, which is mounted to the VRPC via
810
- the SMB or equivalent protocol."""
811
- server_configuration_path: Path = Path()
812
- """Stores the path to the project-specific 'configuration' directory on the BioHPC server, which is mounted to the
813
- VRPC via the SMB or equivalent protocol."""
814
- telomere_path: Path = Path()
815
- """Stores the path to the session's telomere.bin marker. This marker is generated as part of data processing on the
816
- BioHPC server to notify the VRPC that the server received preprocessed data intact. The presence of this marker is
817
- used by the VRPC to determine which locally stored raw_data is safe to delete from the filesystem."""
818
- suite2p_configuration_path: Path = Path()
819
- """Stores the path to the suite2p_configuration.yaml file stored inside the project's 'configuration' directory on
820
- the BioHPC server. This configuration file specifies the parameters for the 'single day' sl-suite2p registration
821
- pipeline, which is applied to each session that generates brain activity data."""
822
- processing_tracker_path: Path = Path()
823
- """Stores the path to the processing_tracker.yaml file stored inside the sessions' root processed_data directory on
824
- the BioHPC server. This file tracks which processing pipelines need to be applied the target session and the status
825
- (success / failure) of each applied pipeline.
826
- """
827
- multiday_configuration_path: Path = Path()
828
- """Stores the path to the multiday_configuration.yaml file stored inside the project's 'configuration' directory
829
- on the BioHPC server. This configuration file specifies the parameters for the 'multiday' sl-suite2p registration
830
- pipeline used to track brain cells across multiple sessions."""
831
-
832
- def resolve_paths(
833
- self,
834
- nas_raw_data_path: Path,
835
- server_raw_data_path: Path,
836
- server_processed_data_path: Path,
837
- server_configuration_path: Path,
838
- ) -> None:
839
- """Resolves all paths managed by the class instance based on the input root directory paths.
840
-
841
- This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
842
- machine that instantiates the class.
843
-
844
- Args:
845
- nas_raw_data_path: The path to the session's raw_data directory on the Synology NAS, relative to the VRPC
846
- filesystem root.
847
- server_raw_data_path: The path to the session's raw_data directory on the BioHPC server, relative to the
848
- VRPC filesystem root.
849
- server_processed_data_path: The path to the session's processed_data directory on the BioHPC server,
850
- relative to the VRPC filesystem root.
851
- server_configuration_path: The path to the project-specific 'configuration' directory on the BioHPC server,
852
- relative to the VRPC filesystem root.
853
- """
854
-
855
- # Generates the managed paths
856
- self.nas_raw_data_path = nas_raw_data_path
857
- self.server_raw_data_path = server_raw_data_path
858
- self.server_processed_data_path = server_processed_data_path
859
- self.server_configuration_path = server_configuration_path
860
- self.telomere_path = self.server_raw_data_path.joinpath("telomere.bin")
861
- self.suite2p_configuration_path = self.server_configuration_path.joinpath("suite2p_configuration.yaml")
862
- self.processing_tracker_path = self.server_processed_data_path.joinpath("processing_tracker.yaml")
863
- self.multiday_configuration_path = self.server_configuration_path.joinpath("multiday_configuration.yaml")
864
-
865
- def make_directories(self) -> None:
866
- """Ensures that all major subdirectories and the root directory exist."""
867
- ensure_directory_exists(self.nas_raw_data_path)
868
- ensure_directory_exists(self.server_raw_data_path)
869
- ensure_directory_exists(self.server_configuration_path)
870
- ensure_directory_exists(self.server_processed_data_path)
871
-
872
-
873
337
  @dataclass
874
338
  class SessionData(YamlConfig):
875
- """Stores and manages the data layout of a single training or experiment session acquired using the Sun lab
876
- Mesoscope-VR system.
339
+ """Stores and manages the data layout of a single training or experiment session acquired in the Sun lab.
877
340
 
878
341
  The primary purpose of this class is to maintain the session data structure across all supported destinations and
879
342
  during all processing stages. It generates the paths used by all other classes from all Sun lab libraries that
@@ -886,11 +349,6 @@ class SessionData(YamlConfig):
886
349
  session order.
887
350
 
888
351
  Notes:
889
- If this class is instantiated on the VRPC, it is expected that the BioHPC server, Synology NAS, and ScanImagePC
890
- data directories are mounted on the local filesystem via the SMB or equivalent protocol. All manipulations
891
- with these destinations are carried out with the assumption that the local OS has full access to these
892
- directories and filesystems.
893
-
894
352
  This class is specifically designed for working with the data from a single session, performed by a single
895
353
  animal under the specific experiment. The class is used to manage both raw and processed data. It follows the
896
354
  data through acquisition, preprocessing and processing stages of the Sun lab data workflow. Together with
@@ -906,8 +364,11 @@ class SessionData(YamlConfig):
906
364
  """Stores the name (timestamp-based ID) of the managed session."""
907
365
  session_type: str
908
366
  """Stores the type of the session. Primarily, this determines how to read the session_descriptor.yaml file. Has
909
- to be set to one of the four supported types: 'Lick training', 'Run training', 'Window checking' or 'Experiment'.
367
+ to be set to one of the supported types: 'lick training', 'run training', 'window checking' or
368
+ 'mesoscope experiment'.
910
369
  """
370
+ acquisition_system: str
371
+ """Stores the name of the data acquisition and runtime management system that acquired the data."""
911
372
  experiment_name: str | None
912
373
  """Stores the name of the experiment configuration file. If the session_type field is set to 'Experiment' and this
913
374
  field is not None (null), it communicates the specific experiment configuration used by the session. During runtime,
@@ -919,59 +380,37 @@ class SessionData(YamlConfig):
919
380
  processed_data: ProcessedData = field(default_factory=lambda: ProcessedData())
920
381
  """Stores the paths to all subfolders and files found under the /project/animal/session/processed_data directory of
921
382
  any PC used to work with Sun lab data."""
922
- deeplabcut_data: DeepLabCutData = field(default_factory=lambda: DeepLabCutData())
923
- """Stores the paths to all subfolders and files found under the /project/deeplabcut directory of any PC used to
924
- work with Sun lab data."""
925
- configuration_data: ConfigurationData = field(default_factory=lambda: ConfigurationData())
926
- """Stores the paths to all subfolders and files found under the /project/configuration directory of any PC used to
927
- work with Sun lab data."""
928
- vrpc_persistent_data: VRPCPersistentData = field(default_factory=lambda: VRPCPersistentData())
929
- """Stores the paths to all subfolders and files found under the /project/animal/persistent_data directory of
930
- the VRPC used in the Sun lab to acquire behavior data."""
931
- scanimagepc_persistent_data: ScanImagePCPersistentData = field(default_factory=lambda: ScanImagePCPersistentData())
932
- """Stores the paths to all subfolders and files found under the /project/animal/persistent_data directory of
933
- the ScanImagePC used in the Sun lab to acquire brain activity data."""
934
- mesoscope_data: MesoscopeData = field(default_factory=lambda: MesoscopeData())
935
- """Stores the paths to all subfolders and files found under the /meso_data (root mesoscope data) directory of
936
- the ScanImagePC used in the Sun lab to acquire brain activity data."""
937
- destinations: VRPCDestinations = field(default_factory=lambda: VRPCDestinations())
938
- """Stores the paths to all subfolders and files under various VRPC-filesystem-mounted directories of other machines
939
- used in the Sun lab for long-term data storage."""
940
383
 
941
384
  @classmethod
942
385
  def create(
943
386
  cls,
387
+ project_name: str,
944
388
  animal_id: str,
945
389
  session_type: str,
946
- project_configuration: ProjectConfiguration,
947
390
  experiment_name: str | None = None,
948
391
  session_name: str | None = None,
949
392
  ) -> "SessionData":
950
- """Creates a new SessionData object and generates the new session's data structure.
393
+ """Creates a new SessionData object and generates the new session's data structure on the local PC.
951
394
 
952
- This method is called by sl-experiment runtimes that create new training or experiment sessions to generate the
953
- session data directory tree. It always assumes it is called on the VRPC and, as part of its runtime, resolves
954
- and generates the necessary local and ScanImagePC directories to support acquiring and preprocessing session's
955
- data.
395
+ This method is intended to be called exclusively by the sl-experiment library to create new training or
396
+ experiment sessions and generate the session data directory tree.
956
397
 
957
398
  Notes:
958
399
  To load an already existing session data structure, use the load() method instead.
959
400
 
960
401
  This method automatically dumps the data of the created SessionData instance into the session_data.yaml file
961
402
  inside the root raw_data directory of the created hierarchy. It also finds and dumps other configuration
962
- files, such as project_configuration.yaml and experiment_configuration.yaml, into the same raw_data
963
- directory. This ensures that if the session's runtime is interrupted unexpectedly, the acquired data can
964
- still be processed.
403
+ files, such as project_configuration.yaml, experiment_configuration.yaml, and system_configuration.yaml into
404
+ the same raw_data directory. This ensures that if the session's runtime is interrupted unexpectedly, the
405
+ acquired data can still be processed.
965
406
 
966
407
  Args:
408
+ project_name: The name of the project for which the data is acquired.
967
409
  animal_id: The ID code of the animal for which the data is acquired.
968
410
  session_type: The type of the session. Primarily, this determines how to read the session_descriptor.yaml
969
411
  file. Valid options are 'Lick training', 'Run training', 'Window checking', or 'Experiment'.
970
412
  experiment_name: The name of the experiment executed during managed session. This optional argument is only
971
413
  used for 'Experiment' session types. It is used to find the experiment configuration .YAML file.
972
- project_configuration: The initialized ProjectConfiguration instance that stores the session's project
973
- configuration data. This is used to determine the root directory paths for all lab machines used during
974
- data acquisition and processing.
975
414
  session_name: An optional session_name override. Generally, this argument should not be provided for most
976
415
  sessions. When provided, the method uses this name instead of generating a new timestamp-based name.
977
416
  This is only used during the 'ascension' runtime to convert old data structures to the modern
@@ -981,30 +420,30 @@ class SessionData(YamlConfig):
981
420
  An initialized SessionData instance that stores the layout of the newly created session's data.
982
421
  """
983
422
 
984
- # Acquires the UTC timestamp to use as the session name
423
+ if session_type.lower() not in _valid_session_types:
424
+ message = (
425
+ f"Invalid session type '{session_type.lower()}' encountered when creating a new SessionData instance. "
426
+ f"Use one of the supported session types: {_valid_session_types}"
427
+ )
428
+ console.error(message=message, error=ValueError)
429
+
430
+ # Acquires the UTC timestamp to use as the session name, unless a name override is provided
985
431
  if session_name is None:
986
432
  session_name = str(get_timestamp(time_separator="-"))
987
433
 
988
- # Extracts the root directory paths stored inside the project configuration file. These roots are then used to
989
- # initialize this class instance.
990
- vrpc_root = Path(project_configuration.local_root_directory)
991
- mesoscope_root = Path(project_configuration.local_mesoscope_directory)
992
- biohpc_root = Path(project_configuration.local_server_directory)
993
- biohpc_workdir = Path(project_configuration.local_server_working_directory)
994
- nas_root = Path(project_configuration.local_nas_directory)
434
+ # Resolves the acquisition system configuration. This queries the acquisition system configuration data used
435
+ # by the machine (PC) that calls this method.
436
+ acquisition_system = get_system_configuration_data()
995
437
 
996
- # Extracts the name of the project stored inside the project configuration file.
997
- project_name = project_configuration.project_name
998
-
999
- # Constructs the session directory path
1000
- session_path = vrpc_root.joinpath(project_name, animal_id, session_name)
438
+ # Constructs the root session directory path
439
+ session_path = acquisition_system.paths.root_directory.joinpath(project_name, animal_id, session_name)
1001
440
 
1002
441
  # Handles potential session name conflicts
1003
442
  counter = 0
1004
443
  while session_path.exists():
1005
444
  counter += 1
1006
445
  new_session_name = f"{session_name}_{counter}"
1007
- session_path = vrpc_root.joinpath(project_name, animal_id, new_session_name)
446
+ session_path = acquisition_system.paths.root_directory.joinpath(project_name, animal_id, new_session_name)
1008
447
 
1009
448
  # If a conflict is detected and resolved, warns the user about the resolved conflict.
1010
449
  if counter > 0:
@@ -1016,93 +455,50 @@ class SessionData(YamlConfig):
1016
455
  )
1017
456
  console.echo(message=message, level=LogLevel.ERROR)
1018
457
 
1019
- # Generates subclasses stored inside the main class instance based on the data resolved above. Note; most fields
1020
- # of these classes are resolved automatically, based on one or more 'root' paths provided to the 'resolve_paths'
1021
- # method.
458
+ # Generates subclasses stored inside the main class instance based on the data resolved above.
1022
459
  raw_data = RawData()
1023
460
  raw_data.resolve_paths(root_directory_path=session_path.joinpath("raw_data"))
1024
- raw_data.make_directories() # Generates the local directory tree
461
+ raw_data.make_directories() # Generates the local 'raw_data' directory tree
1025
462
 
463
+ # Resolves, but does not make processed_data directories. All runtimes that require access to 'processed_data'
464
+ # are configured to generate those directories if necessary, so there is no need to make them here.
1026
465
  processed_data = ProcessedData()
1027
466
  processed_data.resolve_paths(root_directory_path=session_path.joinpath("processed_data"))
1028
- processed_data.make_directories()
1029
-
1030
- dlc_data = DeepLabCutData()
1031
- dlc_data.resolve_paths(root_directory_path=vrpc_root.joinpath(project_name, "deeplabcut"))
1032
- dlc_data.make_directories()
1033
-
1034
- configuration_data = ConfigurationData()
1035
- configuration_data.resolve_paths(
1036
- root_directory_path=vrpc_root.joinpath(project_name, "configuration"),
1037
- experiment_name=experiment_name,
1038
- )
1039
- configuration_data.make_directories()
1040
-
1041
- vrpc_persistent_data = VRPCPersistentData()
1042
- vrpc_persistent_path = vrpc_root.joinpath(project_name, animal_id, "persistent_data")
1043
- vrpc_persistent_data.resolve_paths(root_directory_path=vrpc_persistent_path)
1044
- vrpc_persistent_data.make_directories()
1045
-
1046
- scanimagepc_persistent_data = ScanImagePCPersistentData()
1047
- scanimagepc_persistent_path = mesoscope_root.joinpath(project_name, animal_id, "persistent_data")
1048
- scanimagepc_persistent_data.resolve_paths(root_directory_path=scanimagepc_persistent_path)
1049
- scanimagepc_persistent_data.make_directories()
1050
-
1051
- mesoscope_data = MesoscopeData()
1052
- mesoscope_data.resolve_paths(root_mesoscope_path=mesoscope_root, session_name=session_name)
1053
- mesoscope_data.make_directories()
1054
-
1055
- destinations = VRPCDestinations()
1056
- destinations.resolve_paths(
1057
- nas_raw_data_path=nas_root.joinpath(project_name, animal_id, session_name, "raw_data"),
1058
- server_raw_data_path=biohpc_root.joinpath(project_name, animal_id, session_name, "raw_data"),
1059
- server_configuration_path=biohpc_root.joinpath(project_name, "configuration"),
1060
- server_processed_data_path=biohpc_workdir.joinpath(project_name, "processed_data"),
1061
- )
1062
- destinations.make_directories()
1063
467
 
1064
468
  # Packages the sections generated above into a SessionData instance
1065
469
  instance = SessionData(
1066
- project_name=project_configuration.project_name,
470
+ project_name=project_name,
1067
471
  animal_id=animal_id,
1068
472
  session_name=session_name,
1069
- session_type=session_type,
473
+ session_type=session_type.lower(),
474
+ acquisition_system=acquisition_system.name,
1070
475
  raw_data=raw_data,
1071
- deeplabcut_data=dlc_data,
1072
- configuration_data=configuration_data,
1073
476
  processed_data=processed_data,
1074
- vrpc_persistent_data=vrpc_persistent_data,
1075
- scanimagepc_persistent_data=scanimagepc_persistent_data,
1076
- mesoscope_data=mesoscope_data,
1077
- destinations=destinations,
1078
477
  experiment_name=experiment_name,
1079
478
  )
1080
479
 
1081
480
  # Saves the configured instance data to the session's folder, so that it can be reused during processing or
1082
- # preprocessing
481
+ # preprocessing.
1083
482
  instance._save()
1084
483
 
1085
- # Extracts and saves the necessary configuration classes to the session raw_data folder. Note, this list of
1086
- # classes is not exhaustive. More classes are saved as part of the session runtime management class start() and
1087
- # __init__() method runtimes:
484
+ # Also saves the ProjectConfiguration, SystemConfiguration, and ExperimentConfiguration instances to the same
485
+ # folder using the paths resolved for the RawData instance above.
1088
486
 
1089
- # Discovers and saves the necessary configuration class instances to the raw_data and the processed_data folders
1090
- # of the managed session:
1091
- # Project Configuration
1092
- sh.copy2(
1093
- src=instance.configuration_data.project_configuration_path,
1094
- dst=instance.raw_data.project_configuration_path,
487
+ # Copies the project_configuration.yaml file to session's folder
488
+ project_configuration_path = acquisition_system.paths.root_directory.joinpath(
489
+ project_name, "configuration", "project_configuration.yaml"
1095
490
  )
1096
- sh.copy2(
1097
- src=instance.configuration_data.project_configuration_path,
1098
- dst=instance.processed_data.project_configuration_path,
1099
- ) # ProjectConfiguration and SessionData are saved to both raw and processed data folders.
1100
- # Experiment Configuration, if the session type is Experiment.
491
+ sh.copy2(project_configuration_path, instance.raw_data.project_configuration_path)
492
+
493
+ # Dumps the acquisition system's configuration data to session's folder
494
+ acquisition_system.save(path=instance.raw_data.system_configuration_path)
495
+
1101
496
  if experiment_name is not None:
1102
- sh.copy2(
1103
- src=instance.configuration_data.experiment_configuration_path,
1104
- dst=instance.raw_data.experiment_configuration_path,
497
+ # Copies the experiment_configuration.yaml file to session's folder
498
+ experiment_configuration_path = acquisition_system.paths.root_directory.joinpath(
499
+ project_name, "configuration", f"{experiment_name}.yaml"
1105
500
  )
501
+ sh.copy2(experiment_configuration_path, instance.raw_data.experiment_configuration_path)
1106
502
 
1107
503
  # Returns the initialized SessionData instance to caller
1108
504
  return instance
@@ -1111,38 +507,34 @@ class SessionData(YamlConfig):
1111
507
  def load(
1112
508
  cls,
1113
509
  session_path: Path,
1114
- on_server: bool,
1115
- make_directories: bool = True,
510
+ processed_data_root: Path | None = None,
511
+ make_processed_data_directory: bool = False,
1116
512
  ) -> "SessionData":
1117
513
  """Loads the SessionData instance from the target session's session_data.yaml file.
1118
514
 
1119
515
  This method is used to load the data layout information of an already existing session. Primarily, this is used
1120
- when preprocessing or processing session data. Depending on the call location (machine), the method
1121
- automatically resolves all necessary paths and creates the necessary directories.
516
+ when preprocessing or processing session data. Due to how SessionData is stored and used in the lab, this
517
+ method always loads the data layout from the session_data.yaml file stored inside the raw_data session
518
+ subfolder. Currently, all interactions with Sun lab data require access to the 'raw_data' folder.
1122
519
 
1123
520
  Notes:
1124
521
  To create a new session, use the create() method instead.
1125
522
 
1126
- Although session_data.yaml is stored both inside raw_data and processed_data subfolders, this method
1127
- always searches only inside the raw_data folder. Storing session data in both folders is only used to ensure
1128
- human experimenters can always trace all data in the lab back to the proper project, animal, and session.
1129
-
1130
523
  Args:
1131
- session_path: The path to the root directory of an existing session, e.g.: vrpc_root/project/animal/session.
1132
- on_server: Determines whether the method is used to initialize an existing session on the BioHPC server or
1133
- a non-server machine. Note, non-server runtimes use the same 'root' directory to store raw_data and
1134
- processed_data subfolders. BioHPC server runtimes use different volumes (drives) to store these
1135
- subfolders.
1136
- make_directories: Determines whether to attempt creating any missing directories. Generally, this option
1137
- is safe to be True for all destinations other than some specific BioHPC server runtimes, where some
1138
- data is 'owned' by a general lab account and not the user account. These cases are only present for the
1139
- sl-forgery library and are resolved by that library.
524
+ session_path: The path to the root directory of an existing session, e.g.: root/project/animal/session.
525
+ processed_data_root: If processed data is kept on a drive different from the one that stores raw data,
526
+ provide the path to the root project directory (directory that stores all Sun lab projects) on that
527
+ drive. The method will automatically resolve the project/animal/session/processed_data hierarchy using
528
+ this root path. If raw and processed data are kept on the same drive, keep this set to None.
529
+ make_processed_data_directory: Determines whether this method should create processed_data directory if it
530
+ does not exist.
1140
531
 
1141
532
  Returns:
1142
533
  An initialized SessionData instance for the session whose data is stored at the provided path.
1143
534
 
1144
535
  Raises:
1145
536
  FileNotFoundError: If the 'session_data.yaml' file is not found under the session_path/raw_data/ subfolder.
537
+
1146
538
  """
1147
539
  # To properly initialize the SessionData instance, the provided path should contain the raw_data directory
1148
540
  # with session_data.yaml file.
@@ -1157,7 +549,7 @@ class SessionData(YamlConfig):
1157
549
  console.error(message=message, error=FileNotFoundError)
1158
550
 
1159
551
  # Loads class data from .yaml file
1160
- instance: SessionData = cls._safe_load(path=session_data_path)
552
+ instance: SessionData = cls.from_yaml(file_path=session_data_path) # type: ignore
1161
553
 
1162
554
  # The method assumes that the 'donor' .yaml file is always stored inside the raw_data directory of the session
1163
555
  # to be processed. Since the directory itself might have moved (between or even within the same PC) relative to
@@ -1169,109 +561,26 @@ class SessionData(YamlConfig):
1169
561
  new_root = local_root.joinpath(instance.project_name, instance.animal_id, instance.session_name, "raw_data")
1170
562
  instance.raw_data.resolve_paths(root_directory_path=new_root)
1171
563
 
1172
- # Uses the adjusted raw_data section to load the ProjectConfiguration instance. This is used below to resolve
1173
- # all other SessionData sections, as it stores various required root directories.
1174
- project_configuration: ProjectConfiguration = ProjectConfiguration.load(
1175
- project_name=instance.project_name,
1176
- configuration_path=Path(instance.raw_data.project_configuration_path),
1177
- )
1178
-
1179
- # Resolves the new roots for all sections that use the same root as the raw_data directory:
1180
-
1181
- # CONFIGURATION
1182
- new_root = local_root.joinpath(instance.project_name, "configuration")
1183
- instance.configuration_data.resolve_paths(
1184
- root_directory_path=new_root,
1185
- experiment_name=instance.experiment_name,
1186
- )
1187
-
1188
- # DEEPLABCUT
1189
- new_root = local_root.joinpath(instance.project_name, "deeplabcut")
1190
- instance.deeplabcut_data.resolve_paths(root_directory_path=new_root)
1191
-
1192
- # Resolves the roots for all VRPC-specific sections that use the data from the ProjectConfiguration instance:
1193
-
1194
- # VRPC PERSISTENT DATA
1195
- new_root = Path(project_configuration.local_root_directory).joinpath(
1196
- instance.project_name, instance.animal_id, "persistent_data"
1197
- )
1198
- instance.vrpc_persistent_data.resolve_paths(root_directory_path=new_root)
1199
-
1200
- # SCANIMAGEPC PERSISTENT DATA
1201
- new_root = Path(project_configuration.local_mesoscope_directory).joinpath(
1202
- instance.project_name, instance.animal_id, "persistent_data"
1203
- )
1204
- instance.scanimagepc_persistent_data.resolve_paths(root_directory_path=new_root)
1205
-
1206
- # MESOSCOPE DATA
1207
- instance.mesoscope_data.resolve_paths(
1208
- root_mesoscope_path=Path(project_configuration.local_mesoscope_directory),
1209
- session_name=instance.session_name,
1210
- )
1211
-
1212
- # DESTINATIONS
1213
- instance.destinations.resolve_paths(
1214
- nas_raw_data_path=Path(project_configuration.local_nas_directory).joinpath(
1215
- instance.project_name, instance.animal_id, instance.session_name, "raw_data"
1216
- ),
1217
- server_raw_data_path=Path(project_configuration.local_server_directory).joinpath(
1218
- instance.project_name, instance.animal_id, instance.session_name, "raw_data"
1219
- ),
1220
- server_configuration_path=Path(project_configuration.local_server_directory).joinpath(
1221
- instance.project_name, "configuration"
1222
- ),
1223
- server_processed_data_path=Path(project_configuration.local_server_working_directory).joinpath(
1224
- instance.project_name, instance.animal_id, instance.session_name, "processed_data"
1225
- ),
1226
- )
1227
-
1228
- # Resolves the paths to the processed_data directories. The resolution strategy depends on whether the method is
1229
- # called on the VRPC (locally) or the BioHPC server (remotely).
1230
- if not on_server:
1231
- # Local runtimes use the same root session directory for both raw_data and processed_data. This stems from
1232
- # the assumption that most local machines in the lab only use NVME (fast) volumes and, therefore, do not
1233
- # need to separate 'storage' and 'working' data directories.
1234
- new_root = local_root # Reuses the local root for non-server runtimes
1235
-
1236
- else:
1237
- # The BioHPC server stores raw_data on slow volume and processed_data on fast (NVME) volume. Therefore, to
1238
- # configure processed_data paths, the method first needs to load the fast volume root path from the
1239
- # project_configuration.yaml file stored in the raw_data folder.
1240
- new_root = Path(project_configuration.remote_working_directory)
564
+ # Unless a different root is provided for processed data, it uses the same root as raw_data.
565
+ if processed_data_root is None:
566
+ processed_data_root = new_root
1241
567
 
1242
568
  # Regenerates the processed_data path depending on the root resolution above
1243
569
  instance.processed_data.resolve_paths(
1244
- root_directory_path=new_root.joinpath(
570
+ root_directory_path=processed_data_root.joinpath(
1245
571
  instance.project_name, instance.animal_id, instance.session_name, "processed_data"
1246
572
  )
1247
573
  )
1248
574
 
1249
- # Generates data directory hierarchies that may be missing on the local machine
1250
- if make_directories:
1251
- instance.raw_data.make_directories()
1252
- instance.configuration_data.make_directories()
1253
- instance.deeplabcut_data.make_directories()
1254
- instance.processed_data.make_directories()
575
+ # Generates processed data directories if requested and necessary
576
+ if make_processed_data_directory:
1255
577
  instance.processed_data.make_directories()
1256
578
 
1257
- # Ensures that project configuration and session data classes are present in both raw_data and
1258
- # processed_data directories. This ensures that all data of the session can always be traced to the parent
1259
- # project, animal, and session.
1260
- sh.copy2(
1261
- src=instance.raw_data.session_data_path,
1262
- dst=instance.processed_data.session_data_path,
1263
- )
1264
- sh.copy2(
1265
- src=instance.raw_data.project_configuration_path,
1266
- dst=instance.processed_data.project_configuration_path,
1267
- )
1268
-
1269
579
  # Returns the initialized SessionData instance to caller
1270
580
  return instance
1271
581
 
1272
582
  def _save(self) -> None:
1273
- """Saves the instance data to the 'raw_data' directory and the 'processed_data' directory of the managed session
1274
- as a 'session_data.yaml' file.
583
+ """Saves the instance data to the 'raw_data' directory of the managed session as a 'session_data.yaml' file.
1275
584
 
1276
585
  This is used to save the data stored in the instance to disk, so that it can be reused during preprocessing or
1277
586
  data processing. The method is intended to only be used by the SessionData instance itself during its
@@ -1287,61 +596,6 @@ class SessionData(YamlConfig):
1287
596
  # SessionData instance from being loaded from disk.
1288
597
  origin.raw_data = None # type: ignore
1289
598
  origin.processed_data = None # type: ignore
1290
- origin.configuration_data = None # type: ignore
1291
- origin.deeplabcut_data = None # type: ignore
1292
- origin.vrpc_persistent_data = None # type: ignore
1293
- origin.scanimagepc_persistent_data = None # type: ignore
1294
- origin.mesoscope_data = None # type: ignore
1295
- origin.destinations = None # type: ignore
1296
599
 
1297
600
  # Saves instance data as a .YAML file
1298
601
  origin.to_yaml(file_path=self.raw_data.session_data_path)
1299
- origin.to_yaml(file_path=self.processed_data.session_data_path)
1300
-
1301
- @classmethod
1302
- def _safe_load(cls, path: Path) -> "SessionData":
1303
- """Loads a SessionData class instance into memory in a way that avoids collisions with outdated SessionData
1304
- formats.
1305
-
1306
- This method is used instead of the default method inherited from the YamlConfig class. Primarily, this is used
1307
- to avoid errors with old SessionData class formats that contain some data that is either no longer present or
1308
- cannot be loaded from YAML. Using this custom method ensures we can load any SessionData class, provided it
1309
- contains the required header fields.
1310
-
1311
- Returns:
1312
- The SessionData instance initialized using the resolved header data.
1313
- """
1314
-
1315
- # Reads the file content without using the YAML parsing methods.
1316
- with open(path, "r") as f:
1317
- content = f.read()
1318
-
1319
- # Extracts the necessary fields using regex
1320
- fields_to_keep = {}
1321
-
1322
- # Defines the field patterns for each field to extract
1323
- patterns = {
1324
- "project_name": r"project_name:\s*(.+?)(?=\n\w|\n$)",
1325
- "animal_id": r"animal_id:\s*(.+?)(?=\n\w|\n$)",
1326
- "session_name": r"session_name:\s*(.+?)(?=\n\w|\n$)",
1327
- "session_type": r"session_type:\s*(.+?)(?=\n\w|\n$)",
1328
- "experiment_name": r"experiment_name:\s*(.+?)(?=\n\w|\n$)",
1329
- }
1330
-
1331
- # Extracts each field
1332
- for key, pattern in patterns.items():
1333
- match = re.search(pattern, content)
1334
- if match:
1335
- fields_to_keep[key] = match.group(1).strip()
1336
- # Solves a bug with how animal_id field is stored, where it contains both sets of quotes. May be helpful
1337
- # to solve potential future issues with other fields too
1338
- fields_to_keep[key] = fields_to_keep[key].replace("'", "")
1339
- else:
1340
- if key == "experiment_name":
1341
- fields_to_keep[key] = "null" # Default for experiment_name
1342
- else:
1343
- fields_to_keep[key] = "" # Default for other fields
1344
-
1345
- # Returns the data to caller
1346
- # noinspection PyTypeChecker
1347
- return dacite.from_dict(data_class=cls, data=fields_to_keep)