sl-shared-assets 2.0.0__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (32) hide show
  1. sl_shared_assets/__init__.py +17 -9
  2. sl_shared_assets/__init__.pyi +12 -8
  3. sl_shared_assets/cli.py +266 -20
  4. sl_shared_assets/cli.pyi +46 -5
  5. sl_shared_assets/data_classes/__init__.py +8 -3
  6. sl_shared_assets/data_classes/__init__.pyi +8 -4
  7. sl_shared_assets/data_classes/configuration_data.py +149 -30
  8. sl_shared_assets/data_classes/configuration_data.pyi +49 -11
  9. sl_shared_assets/data_classes/runtime_data.py +70 -49
  10. sl_shared_assets/data_classes/runtime_data.pyi +41 -33
  11. sl_shared_assets/data_classes/session_data.py +193 -253
  12. sl_shared_assets/data_classes/session_data.pyi +99 -116
  13. sl_shared_assets/data_classes/surgery_data.py +1 -1
  14. sl_shared_assets/server/__init__.py +2 -2
  15. sl_shared_assets/server/__init__.pyi +5 -2
  16. sl_shared_assets/server/job.py +229 -1
  17. sl_shared_assets/server/job.pyi +111 -0
  18. sl_shared_assets/server/server.py +431 -31
  19. sl_shared_assets/server/server.pyi +158 -15
  20. sl_shared_assets/tools/__init__.py +2 -1
  21. sl_shared_assets/tools/__init__.pyi +2 -0
  22. sl_shared_assets/tools/ascension_tools.py +9 -21
  23. sl_shared_assets/tools/ascension_tools.pyi +1 -1
  24. sl_shared_assets/tools/packaging_tools.py +2 -2
  25. sl_shared_assets/tools/project_management_tools.py +147 -41
  26. sl_shared_assets/tools/project_management_tools.pyi +45 -6
  27. {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/METADATA +127 -13
  28. sl_shared_assets-3.0.0.dist-info/RECORD +36 -0
  29. {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/entry_points.txt +2 -0
  30. sl_shared_assets-2.0.0.dist-info/RECORD +0 -36
  31. {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/WHEEL +0 -0
  32. {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -5,8 +5,8 @@ classes, which are also stored as .yaml files inside each session's raw_data and
5
5
  these classes contain all necessary information to restore the data hierarchy on any machine. All other Sun lab
6
6
  libraries use these classes to work with all lab-generated data."""
7
7
 
8
- import re
9
8
  import copy
9
+ from enum import StrEnum
10
10
  import shutil as sh
11
11
  from pathlib import Path
12
12
  from dataclasses import field, dataclass
@@ -16,139 +16,43 @@ from ataraxis_base_utilities import LogLevel, console, ensure_directory_exists
16
16
  from ataraxis_data_structures import YamlConfig
17
17
  from ataraxis_time.time_helpers import get_timestamp
18
18
 
19
- from .configuration_data import get_system_configuration_data
19
+ from .configuration_data import AcquisitionSystems, get_system_configuration_data
20
20
 
21
- # Stores all supported input for SessionData class 'session_type' fields.
22
- _valid_session_types = {"lick training", "run training", "mesoscope experiment", "window checking"}
23
21
 
22
+ class SessionTypes(StrEnum):
23
+ """Defines the set of data acquisition session types supported by various data acquisition systems used in the
24
+ Sun lab.
24
25
 
25
- @dataclass()
26
- class VersionData(YamlConfig):
27
- """Stores information about the versions of important Sun lab libraries used to acquire the session's data."""
28
-
29
- python_version: str = ""
30
- """Stores the Python version used by the environment that acquired the data."""
31
- sl_experiment_version: str = ""
32
- """Stores the version of the sl-experiment library that was used to acquire the data."""
33
-
34
-
35
- @dataclass()
36
- class ProjectConfiguration(YamlConfig):
37
- """Stores the project-specific configuration parameters that do not change between different animals and runtime
38
- sessions.
39
-
40
- An instance of this class is generated and saved as a .yaml file in the 'configuration' directory of each project
41
- when it is created. After that, the stored data is reused for every runtime (training or experiment session) carried
42
- out for each animal of the project. Additionally, a copy of the most actual configuration file is saved inside each
43
- runtime session's 'raw_data' folder, providing seamless integration between the managed data and various Sun lab
44
- (sl-) libraries.
26
+ A data acquisition session broadly encompasses a recording session carried out to either: acquire experiment data,
27
+ train the animal for the upcoming experiments, or to assess the quality of surgical or other pre-experiment
28
+ intervention.
45
29
 
46
30
  Notes:
47
- Together with SessionData, this class forms the entry point for all interactions with the data acquired in the
48
- Sun lab. The fields of this class are used to flexibly configure the runtime behavior of major data acquisition
49
- (sl-experiment) and processing (sl-forgery) libraries, adapting them for any project in the lab.
50
- """
51
-
52
- project_name: str = ""
53
- """Stores the descriptive name of the project. This name is used to create the root directory for the project and
54
- to initialize SessionData instances each time any Sun lab library interacts with the session's data."""
55
- surgery_sheet_id: str = ""
56
- """The ID of the Google Sheet file that stores information about surgical interventions performed on all animals
57
- participating in the managed project. This log sheet is used to parse and write the surgical intervention data for
58
- each animal into every runtime session raw_data folder, so that the surgery data is always kept together with the
59
- rest of the training and experiment data."""
60
- water_log_sheet_id: str = ""
61
- """The ID of the Google Sheet file that stores information about water restriction (and behavior tracker)
62
- information for all animals participating in the managed project. This is used to synchronize the information
63
- inside the water restriction log with the state of the animal at the end of each training or experiment session.
31
+ This enumeration does not differentiate between different acquisition systems. Different acquisition systems
32
+ support different session types, and may not be suited for acquiring some of the session types listed in this
33
+ enumeration.
64
34
  """
65
35
 
66
- @classmethod
67
- def load(cls, configuration_path: Path) -> "ProjectConfiguration":
68
- """Loads the project configuration parameters from the specified project_configuration.yaml file.
69
-
70
- This method is called during each interaction with any runtime session's data, including the creation of a new
71
- session.
72
-
73
- Args:
74
- configuration_path: The path to the project_configuration.yaml file from which to load the data.
75
-
76
- Returns:
77
- The initialized ProjectConfiguration instance that stores the configuration data for the target project.
78
-
79
- Raise:
80
- FileNotFoundError: If the specified configuration file does not exist or is not a valid YAML file.
81
- """
82
-
83
- # Prevents loading non-existent files.
84
- if configuration_path.suffix != ".yaml" or not configuration_path.exists():
85
- message = (
86
- f"Unable to load the project configuration data from the specified path: {configuration_path}. Valid "
87
- f"configuration file paths should use the '.yaml' extension and point to an existing file."
88
- )
89
- console.error(message=message, error=FileNotFoundError)
90
-
91
- # Loads the data from the YAML file and initializes the class instance.
92
- instance: ProjectConfiguration = cls.from_yaml(file_path=configuration_path) # type: ignore
93
-
94
- # Verifies the loaded data. Most importantly, this step does not allow proceeding if the user did not
95
- # replace the surgery log and water restriction log placeholders with valid ID values.
96
- instance._verify_data()
97
-
98
- # Returns the initialized class instance to caller
99
- return instance
100
-
101
- def save(self, path: Path) -> None:
102
- """Saves class instance data to disk as a project_configuration.yaml file.
103
-
104
- This method is automatically called from the 'sl_experiment' library when a new project is created. After this
105
- method's runtime, all future project initialization calls will use the load() method to reuse configuration data
106
- saved to the .yaml file created by this method.
107
-
108
- Args:
109
- path: The path to the .yaml file to save the data to.
110
- """
111
-
112
- # Saves the data to the YAML file
113
- self.to_yaml(file_path=path)
114
-
115
- def _verify_data(self) -> None:
116
- """Verifies the user-modified data loaded from the project_configuration.yaml file.
117
-
118
- Since this class is explicitly designed to be modified by the user, this verification step is carried out to
119
- ensure that the loaded data matches expectations. This reduces the potential for user errors to impact the
120
- runtime behavior of the libraries using this class. This internal method is automatically called by the load()
121
- method.
122
-
123
- Raises:
124
- ValueError: If the loaded data does not match expected formats or values.
125
- """
126
-
127
- # Verifies Google Sheet ID formatting. Google Sheet IDs are usually 44 characters long, containing letters,
128
- # numbers, hyphens, and underscores
129
- pattern = r"^[a-zA-Z0-9_-]{44}$"
130
- if not re.match(pattern, self.surgery_sheet_id):
131
- message = (
132
- f"Unable to verify the surgery_sheet_id field loaded from the 'project_configuration.yaml' file. "
133
- f"Expected a string with 44 characters, using letters, numbers, hyphens, and underscores, but found: "
134
- f"{self.surgery_sheet_id}."
135
- )
136
- console.error(message=message, error=ValueError)
137
- if not re.match(pattern, self.water_log_sheet_id):
138
- message = (
139
- f"Unable to verify the surgery_sheet_id field loaded from the 'project_configuration.yaml' file. "
140
- f"Expected a string with 44 characters, using letters, numbers, hyphens, and underscores, but found: "
141
- f"{self.water_log_sheet_id}."
142
- )
143
- console.error(message=message, error=ValueError)
36
+ LICK_TRAINING = "lick training"
37
+ """Mesoscope-VR session designed to teach animals to use the water delivery port while being head-fixed."""
38
+ RUN_TRAINING = "run training"
39
+ """Mesoscope-VR session designed to teach animals how to run on the treadmill while being head-fixed."""
40
+ MESOSCOPE_EXPERIMENT = "mesoscope experiment"
41
+ """Mesoscope-VR experiment session. The session uses Unity game engine to run experiments in virtual reality task
42
+ environments and collects brain activity data using Mesoscope."""
43
+ WINDOW_CHECKING = "window checking"
44
+ """A special Mesoscope-VR session designed to evaluate the suitability of the given animal to be included into the
45
+ experiment dataset. Specifically, the session involves using the Mesoscope to check the quality of the cell
46
+ activity data."""
144
47
 
145
48
 
146
49
  @dataclass()
147
50
  class RawData:
148
51
  """Stores the paths to the directories and files that make up the 'raw_data' session-specific directory.
149
52
 
150
- The raw_data directory stores the data acquired during the session runtime before and after preprocessing. Since
151
- preprocessing does not alter the data, any data in that folder is considered 'raw'.
53
+ The raw_data directory stores the data acquired during the session data acquisition runtime, before and after
54
+ preprocessing. Since preprocessing does not irreversibly alter the data, any data in that folder is considered
55
+ 'raw,' event if preprocessing losslessly re-compresses the data for efficient transfer.
152
56
 
153
57
  Notes:
154
58
  Sun lab data management strategy primarily relies on keeping multiple redundant copies of the raw_data for
@@ -165,38 +69,34 @@ class RawData:
165
69
  includes .mp4 video files from each recorded camera."""
166
70
  mesoscope_data_path: Path = Path()
167
71
  """Stores the path to the directory that contains all Mesoscope data acquired during the session. Primarily, this
168
- includes the mesoscope-acquired .tiff files (brain activity data) and the motion estimation data. This directory is
169
- created for all sessions, but is only used (filled) by the sessions that use the Mesoscope-VR system to acquire
170
- brain activity data."""
72
+ includes the mesoscope-acquired .tiff files (brain activity data) and the MotionEstimator.me file (motion
73
+ estimation data). This directory is created for all sessions, but is only used (filled) by the sessions that use
74
+ the Mesoscope-VR system to acquire brain activity data."""
171
75
  behavior_data_path: Path = Path()
172
76
  """Stores the path to the directory that contains all non-video behavior data acquired during the session.
173
77
  Primarily, this includes the .npz log files that store serialized data acquired by all hardware components of the
174
- data acquisition system other than cameras and brain activity data acquisition devices (such as the Mesoscope).
175
- The reason why the directory is called 'behavior' is primarily because all .npz files are parsed to infer the
176
- behavior of the animal, in contrast to brain (cell) activity data."""
78
+ data acquisition system other than cameras and brain activity data acquisition devices (such as the Mesoscope)."""
177
79
  zaber_positions_path: Path = Path()
178
80
  """Stores the path to the zaber_positions.yaml file. This file contains the snapshot of all Zaber motor positions
179
- at the end of the session. Zaber motors are used to position the LickPort and the HeadBar manipulators, which is
180
- essential for supporting proper brain imaging and animal's running behavior during the session. This file is only
181
- created for sessions that use the Mesoscope-VR system."""
81
+ at the end of the session. Zaber motors are used to position the LickPort, HeadBar, and Wheel Mesoscope-VR modules
82
+ to support proper brain activity recording and behavior during the session. This file is only created for sessions
83
+ that use the Mesoscope-VR system."""
182
84
  session_descriptor_path: Path = Path()
183
- """Stores the path to the session_descriptor.yaml file. This file is partially filled by the system during runtime
184
- and partially by the experimenter after the runtime. It contains session-specific information, such as the specific
185
- task parameters and the notes made by the experimenter during runtime."""
85
+ """Stores the path to the session_descriptor.yaml file. This file is filled jointly by the data acquisition system
86
+ and the experimenter. It contains session-specific information, such as the specific task parameters and the notes
87
+ made by the experimenter during runtime. Each supported session type uses a unique SessionDescriptor class to define
88
+ the format and content of the session_descriptor.yaml file."""
186
89
  hardware_state_path: Path = Path()
187
90
  """Stores the path to the hardware_state.yaml file. This file contains the partial snapshot of the calibration
188
- parameters used by the data acquisition and runtime management system modules during the session. Primarily,
189
- this is used during data processing to read the .npz data log files generated during runtime."""
91
+ parameters used by the data acquisition system modules during the session. Primarily, it is used during data
92
+ processing to interpret the raw data stored inside .npz log files."""
190
93
  surgery_metadata_path: Path = Path()
191
94
  """Stores the path to the surgery_metadata.yaml file. This file contains the most actual information about the
192
95
  surgical intervention(s) performed on the animal prior to the session."""
193
- project_configuration_path: Path = Path()
194
- """Stores the path to the project_configuration.yaml file. This file contains the snapshot of the configuration
195
- parameters for the session's project."""
196
96
  session_data_path: Path = Path()
197
97
  """Stores the path to the session_data.yaml file. This path is used by the SessionData instance to save itself to
198
- disk as a .yaml file. The file contains the paths to all raw and processed data directories used during data
199
- acquisition or processing runtime."""
98
+ disk as a .yaml file. In turn, the cached data is reused to reinstate the same data hierarchy across all supported
99
+ destinations, enabling various libraries to interface with the session data."""
200
100
  experiment_configuration_path: Path = Path()
201
101
  """Stores the path to the experiment_configuration.yaml file. This file contains the snapshot of the
202
102
  experiment runtime configuration used by the session. This file is only created for experiment sessions."""
@@ -206,13 +106,13 @@ class RawData:
206
106
  the 'virtual' tip, tilt, and fastZ positions set via ScanImage software. This file is only created for sessions that
207
107
  use the Mesoscope-VR system to acquire brain activity data."""
208
108
  window_screenshot_path: Path = Path()
209
- """Stores the path to the .png screenshot of the ScanImagePC screen. The screenshot should contain the image of the
210
- cranial window and the red-dot alignment windows. This is used to generate a visual snapshot of the cranial window
211
- alignment and appearance for each experiment session. This file is only created for sessions that use the
212
- Mesoscope-VR system to acquire brain activity data."""
109
+ """Stores the path to the .png screenshot of the ScanImagePC screen. As a minimum, the screenshot should contain the
110
+ image of the imaging plane and the red-dot alignment window. This is used to generate a visual snapshot of the
111
+ cranial window alignment and cell appearance for each experiment session. This file is only created for sessions
112
+ that use the Mesoscope-VR system to acquire brain activity data."""
213
113
  system_configuration_path: Path = Path()
214
114
  """Stores the path to the system_configuration.yaml file. This file contains the exact snapshot of the data
215
- acquisition and runtime management system configuration parameters used to acquire session data."""
115
+ acquisition system configuration parameters used to acquire session data."""
216
116
  checksum_path: Path = Path()
217
117
  """Stores the path to the ax_checksum.txt file. This file is generated as part of packaging the data for
218
118
  transmission and stores the xxHash-128 checksum of the data. It is used to verify that the transmission did not
@@ -223,29 +123,30 @@ class RawData:
223
123
  telomere.bin file are considered 'incomplete' and are excluded from all automated processing, as they may contain
224
124
  corrupted, incomplete, or otherwise unusable data."""
225
125
  ubiquitin_path: Path = Path()
226
- """Stores the path to the ubiquitin.bin file. This file is primarily used by the sl-experiment libraries to mark
126
+ """Stores the path to the ubiquitin.bin file. This file is primarily used by the sl-experiment library to mark
227
127
  local session data directories for deletion (purging). Typically, it is created once the data is safely moved to
228
128
  the long-term storage destinations (NAS and Server) and the integrity of the moved data is verified on at least one
229
- destination. During 'purge' sl-experiment runtimes, the library discovers and removes all session data marked with
230
- 'ubiquitin.bin' files from the machine that runs the code."""
129
+ destination. During 'sl-purge' sl-experiment runtimes, the library discovers and removes all session data marked
130
+ with 'ubiquitin.bin' files from the machine that runs the command."""
131
+ nk_path: Path = Path()
132
+ """Stores the path to the nk.bin file. This file is used by the sl-experiment library to mark sessions undergoing
133
+ runtime initialization. Since runtime initialization is a complex process that may encounter a runtime error, the
134
+ marker is used to discover sessions that failed to initialize. Since uninitialized sessions by definition do not
135
+ contain any valuable data, they are marked for immediate deletion from all managed destinations."""
231
136
  integrity_verification_tracker_path: Path = Path()
232
137
  """Stores the path to the integrity_verification.yaml tracker file. This file stores the current state of the data
233
138
  integrity verification pipeline. It prevents more than one instance of the pipeline from working with the data
234
139
  at a given time and communicates the outcome (success or failure) of the most recent pipeline runtime."""
235
- version_data_path: Path = Path()
236
- """Stores the path to the version_data.yaml file. This file contains the snapshot of Python and sl-experiment
237
- library versions that were used when the data was acquired."""
238
140
 
239
141
  def resolve_paths(self, root_directory_path: Path) -> None:
240
142
  """Resolves all paths managed by the class instance based on the input root directory path.
241
143
 
242
- This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
243
- machine that instantiates the class.
144
+ This method is called each time the (wrapper) SessionData class is instantiated to regenerate the managed path
145
+ hierarchy on any machine that instantiates the class.
244
146
 
245
147
  Args:
246
- root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
247
- hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
248
- the managed session.
148
+ root_directory_path: The path to the top-level directory of the session. Typically, this path is assembled
149
+ using the following hierarchy: root/project/animal/session_id
249
150
  """
250
151
 
251
152
  # Generates the managed paths
@@ -257,7 +158,6 @@ class RawData:
257
158
  self.session_descriptor_path = self.raw_data_path.joinpath("session_descriptor.yaml")
258
159
  self.hardware_state_path = self.raw_data_path.joinpath("hardware_state.yaml")
259
160
  self.surgery_metadata_path = self.raw_data_path.joinpath("surgery_metadata.yaml")
260
- self.project_configuration_path = self.raw_data_path.joinpath("project_configuration.yaml")
261
161
  self.session_data_path = self.raw_data_path.joinpath("session_data.yaml")
262
162
  self.experiment_configuration_path = self.raw_data_path.joinpath("experiment_configuration.yaml")
263
163
  self.mesoscope_positions_path = self.raw_data_path.joinpath("mesoscope_positions.yaml")
@@ -266,11 +166,15 @@ class RawData:
266
166
  self.system_configuration_path = self.raw_data_path.joinpath("system_configuration.yaml")
267
167
  self.telomere_path = self.raw_data_path.joinpath("telomere.bin")
268
168
  self.ubiquitin_path = self.raw_data_path.joinpath("ubiquitin.bin")
169
+ self.nk_path = self.raw_data_path.joinpath("nk.bin")
269
170
  self.integrity_verification_tracker_path = self.raw_data_path.joinpath("integrity_verification_tracker.yaml")
270
- self.version_data_path = self.raw_data_path.joinpath("version_data.yaml")
271
171
 
272
172
  def make_directories(self) -> None:
273
- """Ensures that all major subdirectories and the root directory exist, creating any missing directories."""
173
+ """Ensures that all major subdirectories and the root directory exist, creating any missing directories.
174
+
175
+ This method is called each time the (wrapper) SessionData class is instantiated and allowed to generate
176
+ missing data directories.
177
+ """
274
178
  ensure_directory_exists(self.raw_data_path)
275
179
  ensure_directory_exists(self.camera_data_path)
276
180
  ensure_directory_exists(self.mesoscope_data_path)
@@ -287,113 +191,113 @@ class ProcessedData:
287
191
  """
288
192
 
289
193
  processed_data_path: Path = Path()
290
- """Stores the path to the root processed_data directory of the session. This directory stores the processed data
291
- as it is generated by various data processing pipelines."""
194
+ """Stores the path to the root processed_data directory of the session. This directory stores the processed session
195
+ data, generated from raw_data directory contents by various data processing pipelines."""
292
196
  camera_data_path: Path = Path()
293
- """Stores the path to the directory that contains video tracking data generated by our DeepLabCut-based video
294
- processing pipelines."""
197
+ """Stores the path to the directory that contains video tracking data generated by the Sun lab DeepLabCut-based
198
+ video processing pipeline(s)."""
295
199
  mesoscope_data_path: Path = Path()
296
- """Stores path to the directory that contains processed brain activity (cell) data generated by our suite2p-based
297
- photometry processing pipelines (single-day and multi-day). This directory is only used by sessions acquired with
298
- the Mesoscope-VR system. For all other sessions, it will be created, but kept empty."""
200
+ """Stores path to the directory that contains processed brain activity (cell) data generated by sl-suite2p
201
+ processing pipelines (single-day and multi-day). This directory is only used by sessions acquired with
202
+ the Mesoscope-VR system."""
299
203
  behavior_data_path: Path = Path()
300
204
  """Stores the path to the directory that contains the non-video and non-brain-activity data extracted from
301
- .npz log files by our in-house log parsing pipeline."""
302
- job_logs_path: Path = Path()
303
- """Stores the path to the directory that stores the standard output and standard error data collected during
304
- server-side data processing pipeline runtimes. This directory is primarily used when running data processing jobs
305
- on the remote server. However, it is possible to configure local runtimes to also redirect log data to files
306
- stored in this directory (by editing ataraxis-base-utilities 'console' variable)."""
205
+ .npz log files by the sl-behavior log processing pipeline."""
307
206
  suite2p_processing_tracker_path: Path = Path()
308
- """Stores the path to the suite2p_processing_tracker.yaml tracker file. This file stores the current state of the
309
- sl-suite2p single-day data processing pipeline."""
310
- dataset_formation_tracker_path: Path = Path()
311
- """Same as suite2p_processing_tracker_path, but stores the current state of the dataset formation process that
312
- includes this session (communicates whether the session has been successfully added to any dataset(s))."""
207
+ """Stores the path to the suite2p_processing_tracker.yaml tracker file. This file stores the current state of
208
+ processing the session with the sl-suite2p single-day pipeline."""
313
209
  behavior_processing_tracker_path: Path = Path()
314
- """Stores the path to the behavior_processing_tracker.yaml file. This file stores the current state of the
315
- behavior (log) data processing pipeline."""
210
+ """Stores the path to the behavior_processing_tracker.yaml file. This file stores the current state of processing
211
+ the session with the sl-behavior log-parsing pipeline."""
316
212
  video_processing_tracker_path: Path = Path()
317
- """Stores the path to the video_processing_tracker.yaml file. This file stores the current state of the video
318
- tracking (DeepLabCut) processing pipeline."""
213
+ """Stores the path to the video_processing_tracker.yaml file. This file stores the current state of processing
214
+ the session with the DeepLabCut-based video processing pipeline."""
215
+ p53_path: Path = Path()
216
+ """Stores the path to the p53.bin file. This file serves as a lock-in marker that determines whether the session is
217
+ in the processing or dataset state. Specifically, if the file does not exist, the session data cannot be integrated
218
+ into any dataset, as it may be actively worked on by processing pipelines. Conversely, if the marker exists,
219
+ processing pipelines are not allowed to work with the session, as it may be actively integrated into one or more
220
+ datasets."""
319
221
 
320
222
  def resolve_paths(self, root_directory_path: Path) -> None:
321
223
  """Resolves all paths managed by the class instance based on the input root directory path.
322
224
 
323
- This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
324
- machine that instantiates the class.
225
+ This method is called each time the (wrapper) SessionData class is instantiated to regenerate the managed path
226
+ hierarchy on any machine that instantiates the class.
325
227
 
326
228
  Args:
327
- root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
328
- hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
329
- the managed session.
229
+ root_directory_path: The path to the top-level directory of the session. Typically, this path is assembled
230
+ using the following hierarchy: root/project/animal/session_id
330
231
  """
331
232
  # Generates the managed paths
332
233
  self.processed_data_path = root_directory_path
333
234
  self.camera_data_path = self.processed_data_path.joinpath("camera_data")
334
235
  self.mesoscope_data_path = self.processed_data_path.joinpath("mesoscope_data")
335
236
  self.behavior_data_path = self.processed_data_path.joinpath("behavior_data")
336
- self.job_logs_path = self.processed_data_path.joinpath("job_logs")
337
237
  self.suite2p_processing_tracker_path = self.processed_data_path.joinpath("suite2p_processing_tracker.yaml")
338
- self.dataset_formation_tracker_path = self.processed_data_path.joinpath("dataset_formation_tracker.yaml")
339
238
  self.behavior_processing_tracker_path = self.processed_data_path.joinpath("behavior_processing_tracker.yaml")
340
239
  self.video_processing_tracker_path = self.processed_data_path.joinpath("video_processing_tracker.yaml")
240
+ self.p53_path = self.processed_data_path.joinpath("p53.bin")
341
241
 
342
242
  def make_directories(self) -> None:
343
- """Ensures that all major subdirectories and the root directory exist, creating any missing directories."""
243
+ """Ensures that all major subdirectories and the root directory exist, creating any missing directories.
244
+
245
+ This method is called each time the (wrapper) SessionData class is instantiated and allowed to generate
246
+ missing data directories.
247
+ """
344
248
 
345
249
  ensure_directory_exists(self.processed_data_path)
346
250
  ensure_directory_exists(self.camera_data_path)
347
251
  ensure_directory_exists(self.behavior_data_path)
348
- ensure_directory_exists(self.job_logs_path)
349
252
 
350
253
 
351
254
  @dataclass
352
255
  class SessionData(YamlConfig):
353
- """Stores and manages the data layout of a single training or experiment session acquired in the Sun lab.
354
-
355
- The primary purpose of this class is to maintain the session data structure across all supported destinations and
356
- during all processing stages. It generates the paths used by all other classes from all Sun lab libraries that
357
- interact with the session's data from the point of its creation and until the data is integrated into an
358
- analysis dataset.
256
+ """Stores and manages the data layout of a single Sun lab data acquisition session.
359
257
 
360
- When necessary, the class can be used to either generate a new session or load the layout of an already existing
361
- session. When the class is used to create a new session, it generates the new session's name using the current
362
- UTC timestamp, accurate to microseconds. This ensures that each session name is unique and preserves the overall
363
- session order.
258
+ The primary purpose of this class is to maintain the session data structure across all supported destinations and to
259
+ provide a unified data access interface shared by all Sun lab libraries. The class can be used to either generate a
260
+ new session or load the layout of an already existing session. When the class is used to create a new session, it
261
+ generates the new session's name using the current UTC timestamp, accurate to microseconds. This ensures that each
262
+ session 'name' is unique and preserves the overall session order.
364
263
 
365
264
  Notes:
366
265
  This class is specifically designed for working with the data from a single session, performed by a single
367
266
  animal under the specific experiment. The class is used to manage both raw and processed data. It follows the
368
- data through acquisition, preprocessing and processing stages of the Sun lab data workflow. Together with
369
- ProjectConfiguration class, this class serves as an entry point for all interactions with the managed session's
370
- data.
267
+ data through acquisition, preprocessing and processing stages of the Sun lab data workflow. This class serves as
268
+ an entry point for all interactions with the managed session's data.
371
269
  """
372
270
 
373
271
  project_name: str
374
- """Stores the name of the managed session's project."""
272
+ """Stores the name of the project for which the session was acquired."""
375
273
  animal_id: str
376
- """Stores the unique identifier of the animal that participates in the managed session."""
274
+ """Stores the unique identifier of the animal that participates in the session."""
377
275
  session_name: str
378
- """Stores the name (timestamp-based ID) of the managed session."""
379
- session_type: str
380
- """Stores the type of the session. Primarily, this determines how to read the session_descriptor.yaml file. Has
381
- to be set to one of the supported types: 'lick training', 'run training', 'window checking' or
382
- 'mesoscope experiment'.
276
+ """Stores the name (timestamp-based ID) of the session."""
277
+ session_type: str | SessionTypes
278
+ """Stores the type of the session. Has to be set to one of the supported session types, defined in the SessionTypes
279
+ enumeration exposed by the sl-shared-assets library.
383
280
  """
384
- acquisition_system: str
385
- """Stores the name of the data acquisition and runtime management system that acquired the data."""
281
+ acquisition_system: str | AcquisitionSystems
282
+ """Stores the name of the data acquisition system that acquired the data. Has to be set to one of the supported
283
+ acquisition systems, defined in the AcquisitionSystems enumeration exposed by the sl-shared-assets library."""
386
284
  experiment_name: str | None
387
- """Stores the name of the experiment configuration file. If the session_type field is set to 'Experiment' and this
388
- field is not None (null), it communicates the specific experiment configuration used by the session. During runtime,
389
- the name stored here is used to load the specific experiment configuration data stored in a .yaml file with the
390
- same name. If the session is not an experiment session, this field is ignored."""
285
+ """Stores the name of the experiment performed during the session. If the session_type field indicates that the
286
+ session is an experiment, this field communicates the specific experiment configuration used by the session. During
287
+ runtime, this name is used to load the specific experiment configuration data stored in a .yaml file with the same
288
+ name. If the session is not an experiment session, this field should be left as Null (None)."""
289
+ python_version: str = "3.11.13"
290
+ """Stores the Python version that was used to acquire session data."""
291
+ sl_experiment_version: str = "3.0.0"
292
+ """Stores the version of the sl-experiment library that was used to acquire the session data."""
391
293
  raw_data: RawData = field(default_factory=lambda: RawData())
392
- """Stores the paths to all subfolders and files found under the /project/animal/session/raw_data directory of any
393
- PC used to work with Sun lab data."""
294
+ """Stores absolute paths to all directories and files that jointly make the session's raw data hierarchy. This
295
+ directory structure is resolved for each machine that creates or loads the SessionData class to ensure that all
296
+ Sun lab data can be accessed via the same API on any destination."""
394
297
  processed_data: ProcessedData = field(default_factory=lambda: ProcessedData())
395
- """Stores the paths to all subfolders and files found under the /project/animal/session/processed_data directory of
396
- any PC used to work with Sun lab data."""
298
+ """Stores absolute paths to all directories and files that jointly make the session's processed data hierarchy.
299
+ Typically, this hierarchy is only used on the lab's processing server(s), but it can also be used to run local
300
+ testing on end-user machines."""
397
301
 
398
302
  def __post_init__(self) -> None:
399
303
  """Ensures raw_data and processed_data are always instances of RawData and ProcessedData."""
@@ -408,9 +312,11 @@ class SessionData(YamlConfig):
408
312
  cls,
409
313
  project_name: str,
410
314
  animal_id: str,
411
- session_type: str,
315
+ session_type: SessionTypes | str,
412
316
  experiment_name: str | None = None,
413
317
  session_name: str | None = None,
318
+ python_version: str = "3.11.13",
319
+ sl_experiment_version: str = "2.0.0",
414
320
  ) -> "SessionData":
415
321
  """Creates a new SessionData object and generates the new session's data structure on the local PC.
416
322
 
@@ -421,31 +327,37 @@ class SessionData(YamlConfig):
421
327
  To load an already existing session data structure, use the load() method instead.
422
328
 
423
329
  This method automatically dumps the data of the created SessionData instance into the session_data.yaml file
424
- inside the root raw_data directory of the created hierarchy. It also finds and dumps other configuration
425
- files, such as project_configuration.yaml, experiment_configuration.yaml, and system_configuration.yaml into
426
- the same raw_data directory. This ensures that if the session's runtime is interrupted unexpectedly, the
427
- acquired data can still be processed.
330
+ inside the root 'raw_data' directory of the created hierarchy. It also finds and dumps other configuration
331
+ files, such as experiment_configuration.yaml and system_configuration.yaml into the same 'raw_data'
332
+ directory. If the session's runtime is interrupted unexpectedly, the acquired data can still be processed
333
+ using these pre-saved class instances.
428
334
 
429
335
  Args:
430
- project_name: The name of the project for which the data is acquired.
431
- animal_id: The ID code of the animal for which the data is acquired.
432
- session_type: The type of the session. Primarily, this determines how to read the session_descriptor.yaml
433
- file. Valid options are 'Lick training', 'Run training', 'Window checking', or 'Experiment'.
434
- experiment_name: The name of the experiment executed during managed session. This optional argument is only
435
- used for 'Experiment' session types. It is used to find the experiment configuration .YAML file.
436
- session_name: An optional session_name override. Generally, this argument should not be provided for most
336
+ project_name: The name of the project for which the session is carried out.
337
+ animal_id: The ID code of the animal participating in the session.
338
+ session_type: The type of the session. Has to be one of the supported session types exposed by the
339
+ SessionTypes enumeration.
340
+ experiment_name: The name of the experiment executed during the session. This optional argument is only
341
+ used for experiment sessions. Note! The name passed to this argument has to match the name of the
342
+ experiment configuration .yaml file.
343
+ session_name: An optional session name override. Generally, this argument should not be provided for most
437
344
  sessions. When provided, the method uses this name instead of generating a new timestamp-based name.
438
345
  This is only used during the 'ascension' runtime to convert old data structures to the modern
439
346
  lab standards.
347
+ python_version: The string that specifies the Python version used to collect session data. Has to be
348
+ specified using the major.minor.patch version format.
349
+ sl_experiment_version: The string that specifies the version of the sl-experiment library used to collect
350
+ session data. Has to be specified using the major.minor.patch version format.
440
351
 
441
352
  Returns:
442
353
  An initialized SessionData instance that stores the layout of the newly created session's data.
443
354
  """
444
355
 
445
- if session_type.lower() not in _valid_session_types:
356
+ # Need to convert to tuple to support Python 3.11
357
+ if session_type not in tuple(SessionTypes):
446
358
  message = (
447
- f"Invalid session type '{session_type.lower()}' encountered when creating a new SessionData instance. "
448
- f"Use one of the supported session types: {_valid_session_types}"
359
+ f"Invalid session type '{session_type}' encountered when creating a new SessionData instance. "
360
+ f"Use one of the supported session types from the SessionTypes enumeration."
449
361
  )
450
362
  console.error(message=message, error=ValueError)
451
363
 
@@ -504,25 +416,21 @@ class SessionData(YamlConfig):
504
416
  project_name=project_name,
505
417
  animal_id=animal_id,
506
418
  session_name=session_name,
507
- session_type=session_type.lower(),
419
+ session_type=session_type,
508
420
  acquisition_system=acquisition_system.name,
509
421
  raw_data=raw_data,
510
422
  processed_data=processed_data,
511
423
  experiment_name=experiment_name,
424
+ python_version=python_version,
425
+ sl_experiment_version=sl_experiment_version,
512
426
  )
513
427
 
514
428
  # Saves the configured instance data to the session's folder, so that it can be reused during processing or
515
429
  # preprocessing.
516
430
  instance._save()
517
431
 
518
- # Also saves the ProjectConfiguration, SystemConfiguration, and ExperimentConfiguration instances to the same
519
- # folder using the paths resolved for the RawData instance above.
520
-
521
- # Copies the project_configuration.yaml file to session's folder
522
- project_configuration_path = acquisition_system.paths.root_directory.joinpath(
523
- project_name, "configuration", "project_configuration.yaml"
524
- )
525
- sh.copy2(project_configuration_path, instance.raw_data.project_configuration_path)
432
+ # Also saves the SystemConfiguration and ExperimentConfiguration instances to the same folder using the paths
433
+ # resolved for the RawData instance above.
526
434
 
527
435
  # Dumps the acquisition system's configuration data to session's folder
528
436
  acquisition_system.save(path=instance.raw_data.system_configuration_path)
@@ -534,6 +442,11 @@ class SessionData(YamlConfig):
534
442
  )
535
443
  sh.copy2(experiment_configuration_path, instance.raw_data.experiment_configuration_path)
536
444
 
445
+ # All newly created sessions are marked with the 'nk.bin' file. If the marker is not removed during runtime,
446
+ # the session becomes a valid target for deletion (purging) runtimes operating from the main acquisition
447
+ # machine of any data acquisition system.
448
+ instance.raw_data.nk_path.touch()
449
+
537
450
  # Returns the initialized SessionData instance to caller
538
451
  return instance
539
452
 
@@ -547,9 +460,9 @@ class SessionData(YamlConfig):
547
460
  """Loads the SessionData instance from the target session's session_data.yaml file.
548
461
 
549
462
  This method is used to load the data layout information of an already existing session. Primarily, this is used
550
- when preprocessing or processing session data. Due to how SessionData is stored and used in the lab, this
551
- method always loads the data layout from the session_data.yaml file stored inside the raw_data session
552
- subfolder. Currently, all interactions with Sun lab data require access to the 'raw_data' folder.
463
+ when processing session data. Due to how SessionData is stored and used in the lab, this method always loads the
464
+ data layout from the session_data.yaml file stored inside the 'raw_data' session subfolder. Currently, all
465
+ interactions with Sun lab data require access to the 'raw_data' folder of each session.
553
466
 
554
467
  Notes:
555
468
  To create a new session, use the create() method instead.
@@ -613,10 +526,19 @@ class SessionData(YamlConfig):
613
526
  # Returns the initialized SessionData instance to caller
614
527
  return instance
615
528
 
529
+ def runtime_initialized(self) -> None:
530
+ """Ensures that the 'nk.bin' marker file is removed from the session's raw_data folder.
531
+
532
+ The 'nk.bin' marker is generated as part of the SessionData initialization (creation) process to mark sessions
533
+ that did not fully initialize during runtime. This service method is designed to be called by the sl-experiment
534
+ library classes to remove the 'nk.bin' marker when it is safe to do so. It should not be called by end-users.
535
+ """
536
+ self.raw_data.nk_path.unlink(missing_ok=True)
537
+
616
538
  def _save(self) -> None:
617
539
  """Saves the instance data to the 'raw_data' directory of the managed session as a 'session_data.yaml' file.
618
540
 
619
- This is used to save the data stored in the instance to disk, so that it can be reused during preprocessing or
541
+ This is used to save the data stored in the instance to disk, so that it can be reused during further stages of
620
542
  data processing. The method is intended to only be used by the SessionData instance itself during its
621
543
  create() method runtime.
622
544
  """
@@ -631,6 +553,10 @@ class SessionData(YamlConfig):
631
553
  origin.raw_data = None # type: ignore
632
554
  origin.processed_data = None # type: ignore
633
555
 
556
+ # Converts StringEnum instances to strings
557
+ origin.session_type = str(origin.session_type)
558
+ origin.acquisition_system = str(origin.acquisition_system)
559
+
634
560
  # Saves instance data as a .YAML file
635
561
  origin.to_yaml(file_path=self.raw_data.session_data_path)
636
562
 
@@ -667,6 +593,16 @@ class ProcessingTracker(YamlConfig):
667
593
  else:
668
594
  self._lock_path = ""
669
595
 
596
+ def __del__(self) -> None:
597
+ """If the instance is garbage-collected without calling the stop() method, assumes this is due to a runtime
598
+ error.
599
+
600
+ It is essential to always resolve the runtime as either 'stopped' or 'erred' to avoid deadlocking the session
601
+ data.
602
+ """
603
+ if self._is_running:
604
+ self.error()
605
+
670
606
  def _load_state(self) -> None:
671
607
  """Reads the current processing state from the wrapped .YAML file."""
672
608
  if self.file_path.exists():
@@ -777,7 +713,11 @@ class ProcessingTracker(YamlConfig):
777
713
  raise Timeout(message) # Fallback to appease mypy, should not be reachable
778
714
 
779
715
  def stop(self) -> None:
780
- """Mark processing as started.
716
+ """Configures the tracker file to indicate that the tracked processing runtime has been completed successfully.
717
+
718
+ After this method returns, it is UNSAFE to do any further processing from the process that calls this method.
719
+ Any process that calls the 'start' method of this class is expected to also call this method or 'error' method
720
+ at the end of the runtime.
781
721
 
782
722
  Raises:
783
723
  TimeoutError: If the file lock for the target .YAML file cannot be acquired within the timeout period.
@@ -819,7 +759,7 @@ class ProcessingTracker(YamlConfig):
819
759
  @property
820
760
  def is_complete(self) -> bool:
821
761
  """Returns True if the tracker wrapped by the instance indicates that the processing runtime has been completed
822
- successfully and False otherwise."""
762
+ successfully at least once and that there is no ongoing processing that uses the target session."""
823
763
  try:
824
764
  # Acquires the lock
825
765
  lock = FileLock(self._lock_path)
@@ -840,8 +780,8 @@ class ProcessingTracker(YamlConfig):
840
780
 
841
781
  @property
842
782
  def encountered_error(self) -> bool:
843
- """Returns True if the tracker wrapped by the instance indicates that the processing runtime aborted due to
844
- encountering an error and False otherwise."""
783
+ """Returns True if the tracker wrapped by the instance indicates that the processing runtime for the target
784
+ session has aborted due to encountering an error."""
845
785
  try:
846
786
  # Acquires the lock
847
787
  lock = FileLock(self._lock_path)
@@ -863,7 +803,7 @@ class ProcessingTracker(YamlConfig):
863
803
  @property
864
804
  def is_running(self) -> bool:
865
805
  """Returns True if the tracker wrapped by the instance indicates that the processing runtime is currently
866
- running and False otherwise."""
806
+ running for the target session."""
867
807
  try:
868
808
  # Acquires the lock
869
809
  lock = FileLock(self._lock_path)