sl-shared-assets 1.0.0rc9__tar.gz → 1.0.0rc11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (23) hide show
  1. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/PKG-INFO +1 -1
  2. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/docs/source/api.rst +11 -0
  3. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/pyproject.toml +2 -1
  4. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/src/sl_shared_assets/__init__.py +0 -36
  5. sl_shared_assets-1.0.0rc9/src/sl_shared_assets/legacy_tools.py → sl_shared_assets-1.0.0rc11/src/sl_shared_assets/ascension_tools.py +47 -30
  6. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/src/sl_shared_assets/cli.py +13 -16
  7. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/src/sl_shared_assets/data_classes.py +173 -144
  8. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/src/sl_shared_assets/packaging_tools.py +1 -0
  9. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/src/sl_shared_assets/suite2p.py +14 -7
  10. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/.gitignore +0 -0
  11. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/LICENSE +0 -0
  12. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/README.md +0 -0
  13. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/docs/Makefile +0 -0
  14. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/docs/make.bat +0 -0
  15. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/docs/source/conf.py +0 -0
  16. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/docs/source/index.rst +0 -0
  17. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/docs/source/welcome.rst +0 -0
  18. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/envs/slsa_dev_lin.yml +0 -0
  19. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/envs/slsa_dev_lin_spec.txt +0 -0
  20. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/src/sl_shared_assets/py.typed +0 -0
  21. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/src/sl_shared_assets/server.py +0 -0
  22. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/src/sl_shared_assets/transfer_tools.py +0 -0
  23. {sl_shared_assets-1.0.0rc9 → sl_shared_assets-1.0.0rc11}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sl-shared-assets
3
- Version: 1.0.0rc9
3
+ Version: 1.0.0rc11
4
4
  Summary: Stores assets shared between multiple Sun (NeuroAI) lab data pipelines.
5
5
  Project-URL: Homepage, https://github.com/Sun-Lab-NBB/sl-shared-assets
6
6
  Project-URL: Documentation, https://sl-shared-assets-api-docs.netlify.app/
@@ -17,6 +17,10 @@ Command Line Interfaces
17
17
  :prog: sl-generate-credentials
18
18
  :nested: full
19
19
 
20
+ .. click:: sl_shared_assets.cli:ascend_tyche_directory
21
+ :prog: sl-ascend
22
+ :nested: full
23
+
20
24
  Packaging Tools
21
25
  ===============
22
26
  .. automodule:: sl_shared_assets.packaging_tools
@@ -48,6 +52,13 @@ General Configuration and Data Storage Classes
48
52
  Compute Server Tools
49
53
  ====================
50
54
  .. automodule:: sl_shared_assets.server
55
+ :members:
56
+ :undoc-members:
57
+ :show-inheritance:
58
+
59
+ Ascension Tools
60
+ ===============
61
+ .. automodule:: sl_shared_assets.ascension_tools
51
62
  :members:
52
63
  :undoc-members:
53
64
  :show-inheritance:
@@ -8,7 +8,7 @@ build-backend = "hatchling.build"
8
8
  # Project metdata section. Provides the genral ID information about the project.
9
9
  [project]
10
10
  name = "sl-shared-assets"
11
- version = "1.0.0rc9"
11
+ version = "1.0.0rc11"
12
12
  description = "Stores assets shared between multiple Sun (NeuroAI) lab data pipelines."
13
13
  readme = "README.md"
14
14
  license = { file = "LICENSE" }
@@ -126,6 +126,7 @@ dev = [
126
126
  [project.scripts]
127
127
  sl-replace-root = "sl_shared_assets.cli:replace_local_root_directory"
128
128
  sl-generate-credentials = "sl_shared_assets.cli:generate_server_credentials_file"
129
+ sl-ascend = "sl_shared_assets.cli:ascend_tyche_directory"
129
130
 
130
131
  # Specifies files that should not be included in the source-code distribution but are also not part of gitignore.
131
132
  [tool.hatch.build.targets.sdist]
@@ -7,33 +7,16 @@ Authors: Ivan Kondratyev (Inkaros), Kushaan Gupta, Yuantao Deng
7
7
 
8
8
  from .server import Server, ServerCredentials
9
9
  from .suite2p import (
10
- Main,
11
- FileIO,
12
- Output,
13
- Channel2,
14
- NonRigid,
15
- ROIDetection,
16
- Registration,
17
- Classification,
18
- OnePRegistration,
19
- SignalExtraction,
20
- CellposeDetection,
21
- SpikeDeconvolution,
22
10
  Suite2PConfiguration,
23
11
  )
24
12
  from .data_classes import (
25
- RawData,
26
13
  DrugData,
27
14
  ImplantData,
28
15
  SessionData,
29
16
  SubjectData,
30
17
  SurgeryData,
31
- Destinations,
32
18
  InjectionData,
33
- MesoscopeData,
34
19
  ProcedureData,
35
- ProcessedData,
36
- PersistentData,
37
20
  ZaberPositions,
38
21
  ExperimentState,
39
22
  MesoscopePositions,
@@ -43,7 +26,6 @@ from .data_classes import (
43
26
  LickTrainingDescriptor,
44
27
  ExperimentConfiguration,
45
28
  MesoscopeExperimentDescriptor,
46
- replace_root_path,
47
29
  )
48
30
  from .transfer_tools import transfer_directory
49
31
  from .packaging_tools import calculate_directory_checksum
@@ -53,32 +35,15 @@ __all__ = [
53
35
  "Server",
54
36
  "ServerCredentials",
55
37
  # Suite2p module
56
- "Main",
57
- "FileIO",
58
- "Output",
59
- "Channel2",
60
- "NonRigid",
61
- "ROIDetection",
62
- "Registration",
63
- "Classification",
64
- "OnePRegistration",
65
- "SignalExtraction",
66
- "CellposeDetection",
67
- "SpikeDeconvolution",
68
38
  "Suite2PConfiguration",
69
39
  # Data classes module
70
- "RawData",
71
40
  "DrugData",
72
41
  "ImplantData",
73
42
  "SessionData",
74
43
  "SubjectData",
75
44
  "SurgeryData",
76
- "Destinations",
77
45
  "InjectionData",
78
- "MesoscopeData",
79
46
  "ProcedureData",
80
- "ProcessedData",
81
- "PersistentData",
82
47
  "ZaberPositions",
83
48
  "ExperimentState",
84
49
  "MesoscopePositions",
@@ -88,7 +53,6 @@ __all__ = [
88
53
  "LickTrainingDescriptor",
89
54
  "ExperimentConfiguration",
90
55
  "MesoscopeExperimentDescriptor",
91
- "replace_root_path",
92
56
  # Transfer tools module
93
57
  "transfer_directory",
94
58
  # Packaging tools module
@@ -1,5 +1,6 @@
1
- """This module provides tools for working with old (legacy) data and data formats. Primarily, they are used to reformat
2
- old Tyche project data to make it compatible with modern Sun lab pipelines."""
1
+ """This module provides tools for translating ('ascending') old Tyche data to use the modern data structure used in the
2
+ Sun lab. The tools from this module will not work for any other data and also assume that the Tyche data has been
3
+ preprocessed with an early version of the Sun lab mesoscope processing pipeline."""
3
4
 
4
5
  from pathlib import Path
5
6
  import datetime
@@ -10,22 +11,27 @@ from ataraxis_base_utilities import LogLevel, console
10
11
  from ataraxis_time.time_helpers import extract_timestamp_from_bytes
11
12
 
12
13
  from .data_classes import SessionData, ProjectConfiguration
14
+ from .transfer_tools import transfer_directory
13
15
  from .packaging_tools import calculate_directory_checksum
14
16
 
17
+ # Ensures the console is enabled when this file is imported
18
+ if not console.enabled:
19
+ console.enable()
20
+
15
21
 
16
22
  def _generate_session_name(acquisition_path: Path) -> str:
17
23
  """Generates a session name using the last modification time of a zstack.mat or MotionEstimator.me file.
18
24
 
19
- This worker function uses one of the motion estimation files stored in each 'acquisition' subfolder of the original
20
- Tyche session data structure to generate a compatible timestamp-based session name. This is used to translate the
21
- original Tyche data hierarchy into the hierarchy used by all modern Sun lab projects and pipelines.
25
+ This worker function uses one of the motion estimation files stored in each Tyche 'acquisition' subfolder to
26
+ generate a modern Sun lab timestamp-based session name. This is used to translate the original Tyche session naming
27
+ pattern into the pattern used by all modern Sun lab projects and pipelines.
22
28
 
23
29
  Args:
24
30
  acquisition_path: The absolute path to the target acquisition folder. These folders are found under the 'day'
25
31
  folders for each animal, e.g.: Tyche-A7/2022_01_03/1.
26
32
 
27
33
  Returns:
28
- The generated session name for that acquisition.
34
+ The modernized session name.
29
35
  """
30
36
 
31
37
  # All well-formed sessions are expected to contain both the zstack.mat and the MotionEstimator.me file.
@@ -60,21 +66,21 @@ def _generate_session_name(acquisition_path: Path) -> str:
60
66
 
61
67
 
62
68
  def _reorganize_data(session_data: SessionData, source_root: Path) -> bool:
63
- """Reorganizes and moves the session data from the source acquisition folder to the newly generated session
64
- hierarchy.
69
+ """Reorganizes and moves the session's data from the source folder in the old Tyche data hierarchy to the raw_data
70
+ folder in the newly created modern hierarchy.
65
71
 
66
- This worker function is used to physically rearrange the data from the original Tyche acquisition folder to the
67
- newly created session hierarchy. It both moves the existing files to their new destinations and renames certain
68
- files to match the latest naming convention used in the Sun lab.
72
+ This worker function is used to physically rearrange the data from the original Tyche data structure to the
73
+ new data structure. It both moves the existing files to their new destinations and renames certain files to match
74
+ the modern naming convention used in the Sun lab.
69
75
 
70
76
  Args:
71
- session_data: The initialized SessionData instance managing the newly created session data hierarchy.
72
- source_root: The absolute path to the source Tyche acquisition folder.
77
+ session_data: The initialized SessionData instance managing the 'ascended' (modernized) session data hierarchy.
78
+ source_root: The absolute path to the old Tyche data hierarchy folder that stores session's data.
73
79
 
74
80
  Returns:
75
- True if all expected data was found and moved. False, if any expected file was not found inside the folder, or
76
- the reorganization process otherwise behaved unexpectedly. If this function returns False, the folder is
77
- statically flagged for manual user intervention to investigate the issue.
81
+ True if the ascension process was successfully completed. False if the process encountered missing data or
82
+ otherwise did not go as expected. When the method returns False, the runtime function requests user intervention
83
+ to finalize the process manually.
78
84
  """
79
85
 
80
86
  # Resolves expected data targets:
@@ -168,10 +174,10 @@ def _reorganize_data(session_data: SessionData, source_root: Path) -> bool:
168
174
 
169
175
 
170
176
  def ascend_tyche_data(root_directory: Path, output_root_directory: Path, server_root_directory: Path) -> None:
171
- """Converts raw data from the Tyche project to use the modern Sun lab layout.
177
+ """Reformats the old Tyche data to use the modern Sun lab layout and metadata files.
172
178
 
173
- This function is used to convert old data to the modern data management standard. In turn, this allows using all
174
- modern data processing pipelines on this data.
179
+ This function is used to convert old Tyche data to the modern data management standard. This is used to make the
180
+ data compatible with the modern Sun lab data workflows.
175
181
 
176
182
  Notes:
177
183
  This function is statically written to work with the raw Tyche dataset featured in the OSM manuscript:
@@ -179,15 +185,19 @@ def ascend_tyche_data(root_directory: Path, output_root_directory: Path, server_
179
185
  preprocessed with the early Sun lab mesoscope compression pipeline. The function will not work for any other
180
186
  project or data hierarchy.
181
187
 
182
- This function does not automatically transfer the data to the Server. It only creates the necessary root
183
- hierarchy on the server and writes the necessary configuration to process the data on the Server, once it is
184
- manually transferred.
188
+ As part of its runtime, the function automatically transfers the ascended session data to the BioHPC server.
189
+ Since transferring the data over the network is the bottleneck of this pipeline, it runs in a single-threaded
190
+ mode and is constrained by the communication channel between the local machine and the BioHPC server. Calling
191
+ this function for a large number of sessions will result in a long processing time due to the network data
192
+ transfer.
185
193
 
186
194
  Args:
187
- root_directory: The root 'project' directory that stores individual Tyche animal folders to process.
188
- output_root_directory: The path to the root directory where to generate the converted Tyche project hierarchy.
189
- server_root_directory: The path to the SMB-mounted BioHPC server storage root directory. The Tyche project
190
- hierarchy is generated both locally and on the Server.
195
+ root_directory: The directory that stores one or more Tyche animal folders. This can be conceptualized as the
196
+ root directory for the Tyche project.
197
+ output_root_directory: The path to the local directory where to generate the converted Tyche project hierarchy.
198
+ Typically, this is the 'root' directory where all other Sun lab projects are stored.
199
+ server_root_directory: The path to the local filesystem-mounted BioHPC server storage directory. Note, this
200
+ directory hs to be mapped to the local filesystem via the SMB or equivalent protocol.
191
201
  """
192
202
  # Generates a (shared) project configuration file.
193
203
  project_configuration = ProjectConfiguration()
@@ -212,7 +222,7 @@ def ascend_tyche_data(root_directory: Path, output_root_directory: Path, server_
212
222
 
213
223
  # Dumps project configuration into the 'configuration' subfolder of the Tyche project.
214
224
  configuration_path = output_root_directory.joinpath("Tyche", "configuration", "project_configuration.yaml")
215
- project_configuration.to_path(path=configuration_path)
225
+ project_configuration.save(path=configuration_path)
216
226
 
217
227
  # Assumes that root directory stores all animal folders to be processed
218
228
  for animal_folder in root_directory.iterdir():
@@ -232,7 +242,7 @@ def ascend_tyche_data(root_directory: Path, output_root_directory: Path, server_
232
242
  # Uses derived session name and the statically created project configuration file to create the
233
243
  # session data hierarchy using the output root. This generates a 'standard' Sun lab directory structure
234
244
  # for the Tyche data.
235
- session_data = SessionData.create_session(
245
+ session_data = SessionData.create(
236
246
  session_name=session_name,
237
247
  animal_id=animal_name,
238
248
  project_configuration=project_configuration,
@@ -249,11 +259,18 @@ def ascend_tyche_data(root_directory: Path, output_root_directory: Path, server_
249
259
  f"Encountered issues when reorganizing {animal_name} session {session_name}. "
250
260
  f"User intervention is required to finish data reorganization process for this session."
251
261
  )
262
+ # noinspection PyTypeChecker
252
263
  console.echo(message=message, level=LogLevel.WARNING)
253
264
  else:
254
- # If the transfer process was successful, generates a new checksum for the moved data and removes
255
- # the now-empty acquisition folder.
265
+ # If the transfer process was successful, generates a new checksum for the moved data
256
266
  calculate_directory_checksum(directory=Path(session_data.raw_data.raw_data_path))
267
+ # Next, copies the data to the BioHPC server for further processing
268
+ transfer_directory(
269
+ source=Path(session_data.raw_data.raw_data_path),
270
+ destination=Path(session_data.destinations.server_raw_data_path),
271
+ verify_integrity=False,
272
+ )
273
+ # Finally, removes the now-empty old session data directory.
257
274
  acquisition_folder.rmdir()
258
275
 
259
276
  # If the loop above removed all acquisition folders, all data for that day has been successfully converted
@@ -1,6 +1,4 @@
1
- """This module stores the Command-Line Interfaces (CLIs) exposes by the library as part of the installation process.
2
- Primarily, these CLIs are used when setting up or reconfiguring the VRPC and other machines in the lab to work with
3
- sl-experiment and sl-forgery libraries."""
1
+ """This module stores the Command-Line Interfaces (CLIs) exposes by the library as part of the installation process."""
4
2
 
5
3
  from pathlib import Path
6
4
 
@@ -8,7 +6,7 @@ import click
8
6
 
9
7
  from .server import generate_server_credentials
10
8
  from .data_classes import replace_root_path
11
- from .legacy_tools import ascend_tyche_data
9
+ from .ascension_tools import ascend_tyche_data
12
10
 
13
11
 
14
12
  @click.command()
@@ -79,34 +77,33 @@ def generate_server_credentials_file(output_directory: str, host: str, username:
79
77
  "--path",
80
78
  type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
81
79
  required=True,
82
- prompt="Enter the absolute path to the root directory storing Tyche animal folders to ascend (modernize): ",
83
- help="The path to the root directory storing Tyche animal folders to ascend (modernize).",
80
+ prompt="Enter the absolute path to the directory that stores original Tyche animal folders: ",
81
+ help="The absolute path to the directory that stores original Tyche animal folders.",
84
82
  )
85
83
  @click.option(
86
84
  "-o",
87
85
  "--output_directory",
88
86
  type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
89
87
  required=True,
90
- prompt="Enter the path to the local directory where to create the ascended Tyche project hierarchy: ",
91
- help="The path to the local directory where to create the ascended Tyche project hierarchy.",
88
+ prompt="Enter the absolute path to the local directory where to create the ascended Tyche project hierarchy: ",
89
+ help="The absolute path to the local directory where to create the ascended Tyche project hierarchy.",
92
90
  )
93
91
  @click.option(
94
92
  "-s",
95
93
  "--server_directory",
96
94
  type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
97
95
  required=True,
98
- prompt="Enter the path to the SMB-mounted BioHPC server directory that will be used to store the ascended data: ",
99
- help="The path to the SMB-mounted BioHPC server directory that will be used to store the ascended data.",
96
+ prompt=(
97
+ "Enter the path to the SMB-mounted BioHPC server directory where to create the ascended Tyche project "
98
+ "hierarchy: "
99
+ ),
100
+ help="The path to the SMB-mounted BioHPC server directory where to create the ascended Tyche project hierarchy.",
100
101
  )
101
102
  def ascend_tyche_directory(path: str, output_directory: str, server_directory: str) -> None:
102
- """Restructures all original Tyche folders to use the modern Sun lab data structure.
103
+ """Restructures old Tyche project data to use the modern Sun lab data structure.
103
104
 
104
- This CLI is used to convert the old Tyche data to make it compatible with modern Sun lab processing pipelines and
105
- data management workflows. This process is commonly referred to as 'ascension' amongst lab engineers. After
105
+ This CLI is used to convert ('ascend') the old Tyche project data to the modern Sun lab structure. After
106
106
  ascension, the data can be processed and analyzed using all modern Sun lab (sl-) tools and libraries.
107
-
108
- Note! This CLi does NOT move the data to the BioHPC server. The data has to be manually transferred to the server
109
- before it can be processed using our server-side pipelines.
110
107
  """
111
108
  ascend_tyche_data(
112
109
  root_directory=Path(path),
@@ -1,6 +1,7 @@
1
- """This module provides classes used to store various data used by the sl-experiment and the sl-forgery libraries.
2
- This includes classes used to store the data generated during acquisition and preprocessing and classes used to manage
3
- the runtime of other libraries (configuration data classes)."""
1
+ """This module provides classes used to store various data used by other Sun lab data acquisition and processing
2
+ libraries.This includes classes used to store the data generated during acquisition and preprocessing and classes used
3
+ to manage the runtime of other libraries (configuration data classes). Most classes from these modules are used by the
4
+ major libraries 'sl-experiment' and 'sl-forgery'."""
4
5
 
5
6
  import re
6
7
  import copy
@@ -14,15 +15,19 @@ from ataraxis_base_utilities import LogLevel, console, ensure_directory_exists
14
15
  from ataraxis_data_structures import YamlConfig
15
16
  from ataraxis_time.time_helpers import get_timestamp
16
17
 
18
+ # Ensures console is enabled when this file is imported
19
+ if not console.enabled:
20
+ console.enable()
21
+
17
22
 
18
23
  def replace_root_path(path: Path) -> None:
19
24
  """Replaces the path to the local root directory used to store all Sun lab projects with the provided path.
20
25
 
21
- When ProjectConfiguration class is instantiated for the first time on a new machine, it asks the user to provide
22
- the path to the local directory where to save all Sun lab projects. This path is then stored inside the default
23
- user data directory as a .yaml file to be reused for all future projects. To support replacing this path without
24
- searching for the user data directory, which is usually hidden, this function finds and updates the contents of the
25
- file that stores the local root path.
26
+ The first time ProjectConfiguration class is instantiated to create a new project on a new machine,
27
+ it asks the user to provide the path to the local directory where to save all Sun lab projects. This path is then
28
+ stored inside the default user data directory as a .yaml file to be reused for all future projects. To support
29
+ replacing this path without searching for the user data directory, which is usually hidden, this function finds and
30
+ updates the contents of the file that stores the local root path.
26
31
 
27
32
  Args:
28
33
  path: The path to the new local root directory.
@@ -49,53 +54,65 @@ class ProjectConfiguration(YamlConfig):
49
54
 
50
55
  An instance of this class is generated and saved as a .yaml file in the 'configuration' directory of each project
51
56
  when it is created. After that, the stored data is reused for every runtime (training or experiment session) carried
52
- out for each animal of the project.
57
+ out for each animal of the project. Additionally, a copy of the most actual configuration file is saved inside each
58
+ runtime session's 'raw_data' folder, providing seamless integration between the managed data and various Sun lab
59
+ (sl-) libraries.
53
60
 
54
61
  Notes:
55
- This class allows flexibly configuring sl_experiment and sl_forgery libraries for different projects in the
56
- Sun lab. This allows hiding most inner workings of all libraries from the end-users, while providing a robust,
57
- machine-independent way to interface with all data acquisition and processing libraries.
62
+ Together with SessionData, this class forms the entry point for all interactions with the data acquired in the
63
+ Sun lab. The fields of this class are used to flexibly configure the runtime behavior of major data acquisition
64
+ (sl-experiment) and processing (sl-forgery) libraries, adapting them for any project in the lab.
65
+
66
+ Most lab projects only need to adjust the "surgery_sheet_id" and "water_log_sheet_id" fields of the class. Most
67
+ fields in this class are used by the sl-experiment library to generate the SessionData class instance for each
68
+ session and during experiment data acquisition and preprocessing. Data processing pipelines use specialized
69
+ configuration files stored in other modules of this library.
58
70
 
59
- Most lab projects only need to adjust the "surgery_sheet_id" and "water_log_sheet_id" fields of the class.
71
+ Although all path fields use str | Path datatype, they are always stored as Path objects. These fields are
72
+ converted to strings only when the data is dumped as a .yaml file.
60
73
  """
61
74
 
62
75
  project_name: str = ""
63
76
  """Stores the descriptive name of the project. This name is used to create the root directory for the project and
64
77
  to discover and load project's data during runtime."""
65
78
  surgery_sheet_id: str = ""
66
- """The ID of the Google Sheet file that stores surgery information for the animal whose data is managed by this
67
- instance. This is used to parse and write the surgery data for each managed animal into its 'metadata' folder, so
68
- that the surgery data is always kept together with the rest of the training and experiment data."""
79
+ """The ID of the Google Sheet file that stores information about surgical interventions performed on all animals
80
+ participating in the managed project. This log sheet is used to parse and write the surgical intervention data for
81
+ each animal into every runtime session raw_data folder, so that the surgery data is always kept together with the
82
+ rest of the training and experiment data."""
69
83
  water_log_sheet_id: str = ""
70
- """The ID of the Google Sheet file that stores water restriction information for the animal whose data is managed
71
- by this instance. This is used to synchronize the information inside the water restriction log with the state of
72
- the animal at the end of each training or experiment session.
84
+ """The ID of the Google Sheet file that stores information about water restriction (and behavior tracker)
85
+ information for all animals participating in the managed project. This is used to synchronize the information
86
+ inside the water restriction log with the state of the animal at the end of each training or experiment session.
73
87
  """
74
88
  google_credentials_path: str | Path = Path("/media/Data/Experiments/sl-surgery-log-0f651e492767.json")
75
89
  """
76
90
  The path to the locally stored .JSON file that contains the service account credentials used to read and write
77
- Google Sheet data. This is used to access and work with the surgery log and the water restriction log. Usually, the
78
- same service account is used across all projects.
91
+ Google Sheet data. This is used to access and work with the surgery log and the water restriction log files.
92
+ Usually, the same service account is used across all projects.
79
93
  """
80
94
  server_credentials_path: str | Path = Path("/media/Data/Experiments/server_credentials.yaml")
81
95
  """
82
96
  The path to the locally stored .YAML file that contains the credentials for accessing the BioHPC server machine.
83
- While the storage (filesystem) of the server machine should already be mounted to the local PC via SMB, this data
84
- is used to establish SSH connection to the machine and start data processing after it is transferred to the server.
85
- This way, our data acquisition, preprocessing, and processing are controlled by the same runtime.
97
+ While the filesystem of the server machine should already be mounted to the local machine via SMB or equivalent
98
+ protocol, this data is used to establish SSH connection to the server and start newly acquired data processing
99
+ after it is transferred to the server. This allows data acquisition, preprocessing, and processing to be controlled
100
+ by the same runtime and prevents unprocessed data from piling up on the server.
86
101
  """
87
102
  local_root_directory: str | Path = Path("/media/Data/Experiments")
88
- """The absolute path to the root directory where all projects are stored on the local host-machine (VRPC). Note,
89
- overwriting the value of this field is pointless, as it is automatically set each time the class is instantiated."""
103
+ """The absolute path to the directory where all projects are stored on the local host-machine (VRPC). Note,
104
+ this field is configured automatically each time the class is instantiated through any method, so overwriting it
105
+ manually will not be respected."""
90
106
  local_server_directory: str | Path = Path("/home/cybermouse/server/storage/sun_data")
91
- """The absolute path to the locally-mapped (via SMB protocol) root BioHPC server machine directory where to store
92
- all projects."""
107
+ """The absolute path to the directory where all projects are stored on the BioHPC server. This directory should be
108
+ locally accessible (mounted) using a network sharing protocol, such as SMB."""
93
109
  local_nas_directory: str | Path = Path("/home/cybermouse/nas/rawdata")
94
- """The absolute path to the locally-mapped (via SMB protocol) root Synology NAS directory where to store all
95
- projects."""
110
+ """The absolute path to the directory where all projects are stored on the Synology NAS. This directory should be
111
+ locally accessible (mounted) using a network sharing protocol, such as SMB."""
96
112
  local_mesoscope_directory: str | Path = Path("/home/cybermouse/scanimage/mesodata")
97
- """The absolute path to the locally-mapped (via SMB protocol) root mesoscope (ScanImagePC) directory where all
98
- mesoscope-acquired data is aggregated during runtime."""
113
+ """The absolute path to the root mesoscope (ScanImagePC) directory where all mesoscope-acquired data is aggregated
114
+ during acquisition runtime. This directory should be locally accessible (mounted) using a network sharing
115
+ protocol, such as SMB."""
99
116
  remote_storage_directory: str | Path = Path("/storage/sun_data")
100
117
  """The absolute path, relative to the BioHPC server root, to the directory where all projects are stored on the
101
118
  slow (SSD) volume of the server. This path is used when running remote (server-side) jobs and, therefore, has to
@@ -111,8 +128,7 @@ class ProjectConfiguration(YamlConfig):
111
128
  right_camera_index: int = 2
112
129
  """The index of the right body camera in the list of all available OpenCV-managed cameras."""
113
130
  harvesters_cti_path: str | Path = Path("/opt/mvIMPACT_Acquire/lib/x86_64/mvGenTLProducer.cti")
114
- """The path to the GeniCam CTI file used to connect to Harvesters-managed cameras. Currently, this is only used by
115
- the face camera."""
131
+ """The path to the GeniCam CTI file used to connect to Harvesters-managed cameras."""
116
132
  actor_port: str = "/dev/ttyACM0"
117
133
  """The USB port used by the Actor Microcontroller."""
118
134
  sensor_port: str = "/dev/ttyACM1"
@@ -124,10 +140,10 @@ class ProjectConfiguration(YamlConfig):
124
140
  lickport_port: str = "/dev/ttyUSB1"
125
141
  """The USB port used by the LickPort Zaber motor controllers (devices)."""
126
142
  unity_ip: str = "127.0.0.1"
127
- """The IP address of the MQTT broker used to communicate with the Unity game engine. Note, this is only used during
143
+ """The IP address of the MQTT broker used to communicate with the Unity game engine. This is only used during
128
144
  experiment runtimes. Training runtimes ignore this parameter."""
129
145
  unity_port: int = 1883
130
- """The port number of the MQTT broker used to communicate with the Unity game engine. Note, this is only used during
146
+ """The port number of the MQTT broker used to communicate with the Unity game engine. This is only used during
131
147
  experiment runtimes. Training runtimes ignore this parameter."""
132
148
  valve_calibration_data: dict[int | float, int | float] | tuple[tuple[int | float, int | float], ...] = (
133
149
  (15000, 1.8556),
@@ -135,46 +151,42 @@ class ProjectConfiguration(YamlConfig):
135
151
  (45000, 7.1846),
136
152
  (60000, 10.0854),
137
153
  )
138
- """A dictionary or tuple of tuples that maps valve open times, in microseconds, to the dispensed volume of water,
139
- in microliters. During runtime, this data is used by the ValveModule to translate the requested reward volumes into
140
- times the valve needs to be open to deliver the desired volume.
154
+ """A tuple of tuples that maps water delivery solenoid valve open times, in microseconds, to the dispensed volume
155
+ of water, in microliters. During training and experiment runtimes, this data is used by the ValveModule to translate
156
+ the requested reward volumes into times the valve needs to be open to deliver the desired volume of water.
141
157
  """
142
158
 
143
159
  @classmethod
144
160
  def load(cls, project_name: str, configuration_path: None | Path = None) -> "ProjectConfiguration":
145
- """Loads the project configuration parameters from a project_configuration.yaml file and uses the loaded data
146
- to initialize the ProjectConfiguration instance.
161
+ """Loads the project configuration parameters from a project_configuration.yaml file.
147
162
 
148
- This method is called for each session runtime to reuse the configuration parameters generated at project
149
- creation. When it is called for the first time (during new project creation), the method generates the default
150
- configuration file and prompts the user to update the configuration before proceeding with the runtime.
163
+ This method is called during each interaction with any runtime session's data, including the creation of a new
164
+ session. When this method is called for a non-existent (new) project name, it generates the default
165
+ configuration file and prompts the user to update the configuration before proceeding with the runtime. All
166
+ future interactions with the sessions from this project reuse the existing configuration file.
151
167
 
152
168
  Notes:
153
169
  As part of its runtime, the method may prompt the user to provide the path to the local root directory.
154
- This directory stores all project subdirectories and acts as the top level of the local data hierarchy.
155
- The path to the directory will be saved inside user's default data directory, so that it can be reused for
156
- all future projects. Use sl-replace_root_path CLI to replace the path that is saved in this way.
170
+ This directory stores all project subdirectories and acts as the top level of the Sun lab data hierarchy.
171
+ The path to the directory is then saved inside user's default data directory, so that it can be reused for
172
+ all future projects. Use sl-replace-root CLI to replace the saved root directory path.
157
173
 
158
- Since this class is used during both data acquisition and processing on different machines, this method
159
- supports multiple ways of initializing the class. Use the project_name on the VRPC (via the sl_experiment
160
- library). Use the configuration path on the BioHPC server (via the sl_forgery library).
174
+ Since this class is used for all Sun lab data structure interactions, this method supports multiple ways of
175
+ loading class data. If this method is called as part of the sl-experiment new session creation pipeline, use
176
+ 'project_name' argument. If this method is called as part of the sl-forgery data processing pipeline(s), use
177
+ 'configuration_path' argument.
161
178
 
162
179
  Args:
163
- project_name: The name of the project whose configuration file needs to be discovered and loaded. Note, this
164
- way of resolving the project is the default way on the VRPC. When processing data on the server, the
165
- pipeline preferentially uses the configuration_path.
166
- configuration_path: The path to the project_configuration.yaml file from which to load the data. This is
167
- an optional way of resolving the configuration data source that always takes precedence over the
168
- project_name when both are provided.
180
+ project_name: The name of the project whose configuration file needs to be discovered and loaded or, if the
181
+ project does not exist, created.
182
+ configuration_path: Optional. The path to the project_configuration.yaml file from which to load the data.
183
+ This way of resolving the configuration data source always takes precedence over the project_name when
184
+ both are provided.
169
185
 
170
186
  Returns:
171
- An initialized ProjectConfiguration instance.
187
+ The initialized ProjectConfiguration instance that stores the configuration data for the target project.
172
188
  """
173
189
 
174
- # Ensures console is enabled
175
- if not console.enabled:
176
- console.enable()
177
-
178
190
  # If the configuration path is not provided, uses the 'default' resolution strategy that involves reading the
179
191
  # user's data directory
180
192
  if configuration_path is None:
@@ -191,6 +203,7 @@ class ProjectConfiguration(YamlConfig):
191
203
  "directory that stores all project-specific directories. This is required when resolving project "
192
204
  "configuration based on project's name."
193
205
  )
206
+ # noinspection PyTypeChecker
194
207
  console.echo(message=message, level=LogLevel.WARNING)
195
208
  root_path_str = input("Local root path: ")
196
209
  root_path = Path(root_path_str)
@@ -223,6 +236,7 @@ class ProjectConfiguration(YamlConfig):
223
236
  f"proceeding further to avoid runtime errors. Also, edit other configuration precursors saved to the "
224
237
  f"same directory to control other aspects of data acquisition and processing."
225
238
  )
239
+ # noinspection PyTypeChecker
226
240
  console.echo(message=message, level=LogLevel.WARNING)
227
241
 
228
242
  # Generates the default project configuration instance and dumps it as a .yaml file. Note, as part of
@@ -230,7 +244,7 @@ class ProjectConfiguration(YamlConfig):
230
244
  # user.
231
245
  precursor = ProjectConfiguration(local_root_directory=Path(str(configuration_path.parents[2])))
232
246
  precursor.project_name = project_name
233
- precursor.to_path(path=configuration_path)
247
+ precursor.save(path=configuration_path)
234
248
 
235
249
  # Waits for the user to manually configure the newly created file.
236
250
  input(f"Enter anything to continue: ")
@@ -263,16 +277,16 @@ class ProjectConfiguration(YamlConfig):
263
277
  # Returns the initialized class instance to caller
264
278
  return instance
265
279
 
266
- def to_path(self, path: Path) -> None:
267
- """Saves the instance data to disk as a project_configuration.yaml file.
280
+ def save(self, path: Path) -> None:
281
+ """Saves class instance data to disk as a project_configuration.yaml file.
268
282
 
269
- This method is automatically called when the project is created. All future runtimes should use the load()
270
- method to load and reuse the configuration data saved to the .yaml file.
283
+ This method is automatically called when a new project is created. After this method's runtime, all future
284
+ calls to the load() method will reuse the configuration data saved to the .yaml file.
271
285
 
272
286
  Notes:
273
- This method also generates and dumps multiple other 'precursor' configuration files into the folder. This
274
- includes the example 'default' experiment configuration and the DeepLabCut and Suite2P configuration files
275
- used during data processing.
287
+ When this method is used to generate the configuration .yaml file for a new project, it also generates the
288
+ example 'default_experiment.yaml'. This file is designed to showcase how to write ExperimentConfiguration
289
+ data files that are used to control Mesoscope-VR system states during experiment session runtimes.
276
290
 
277
291
  Args:
278
292
  path: The path to the .yaml file to save the data to.
@@ -301,15 +315,18 @@ class ProjectConfiguration(YamlConfig):
301
315
  original.to_yaml(file_path=path)
302
316
 
303
317
  # As part of this runtime, also generates and dumps the 'precursor' experiment configuration file.
304
- example_experiment = ExperimentConfiguration()
305
- example_experiment.to_yaml(path.parent.joinpath("default_experiment.yaml"))
318
+ experiment_configuration_path = path.parent.joinpath("default_experiment.yaml")
319
+ if not experiment_configuration_path.exists():
320
+ example_experiment = ExperimentConfiguration()
321
+ example_experiment.to_yaml(experiment_configuration_path)
306
322
 
307
323
  def _verify_data(self) -> None:
308
- """Verifies the data loaded from the project_configuration.yaml file to ensure its validity.
324
+ """Verifies the user-modified data loaded from the project_configuration.yaml file.
309
325
 
310
326
  Since this class is explicitly designed to be modified by the user, this verification step is carried out to
311
327
  ensure that the loaded data matches expectations. This reduces the potential for user errors to impact the
312
- runtime behavior of the library. This internal method is automatically called by the load() method.
328
+ runtime behavior of the libraries using this class. This internal method is automatically called by the load()
329
+ method.
313
330
 
314
331
  Notes:
315
332
  The method does not verify all fields loaded from the configuration file and instead focuses on fields that
@@ -362,7 +379,7 @@ class RawData:
362
379
  includes .mp4 video files from each recorded camera."""
363
380
  mesoscope_data_path: str | Path
364
381
  """Stores the path to the directory that contains all Mesoscope data acquired during the session. Primarily, this
365
- includes the mesoscope-acquired .tif files (brain activity data) and the motion estimation data."""
382
+ includes the mesoscope-acquired .tiff files (brain activity data) and the motion estimation data."""
366
383
  behavior_data_path: str | Path
367
384
  """Stores the path to the directory that contains all behavior data acquired during the session. Primarily, this
368
385
  includes the .npz log files used by data-acquisition libraries to store all acquired data. The data stored in this
@@ -465,10 +482,10 @@ class RawData:
465
482
 
466
483
  Args:
467
484
  new_root: The new root directory to use for all paths inside the instance. This has to be the path to the
468
- root session directory: pc_root/project/animal/session.
485
+ directory that stores all Sun lab projects on the target machine.
469
486
  """
470
487
  # Gets current root from the raw_data_path.
471
- old_root = Path(self.raw_data_path).parents[2]
488
+ old_root = Path(self.raw_data_path).parents[3]
472
489
 
473
490
  # Updates all paths by replacing old_root with new_root
474
491
  self.raw_data_path = new_root.joinpath(Path(self.raw_data_path).relative_to(old_root))
@@ -494,9 +511,9 @@ class RawData:
494
511
  class ProcessedData:
495
512
  """Stores the paths to the directories and files that make up the 'processed_data' session directory.
496
513
 
497
- The processed_data directory stores the processed session data, which is generated by running various processing
498
- pipelines. These pipelines use raw data to generate processed data, which represents an intermediate step between
499
- raw data and the dataset used in the data analysis.
514
+ The processed_data directory stores the data generated by various processing pipelines from the raw data. Processed
515
+ data represents an intermediate step between raw data and the dataset used in the data analysis, but is not itself
516
+ designed to be analyzed.
500
517
 
501
518
  Notes:
502
519
  The paths from this section are typically used only on the BioHPC server. This is because most data processing
@@ -572,10 +589,10 @@ class ProcessedData:
572
589
 
573
590
  Args:
574
591
  new_root: The new root directory to use for all paths inside the instance. This has to be the path to the
575
- root session directory: pc_root/project/animal/session.
592
+ directory that stores all Sun lab projects on the target machine.
576
593
  """
577
594
  # Gets current root from the processed_data_path.
578
- old_root = Path(self.processed_data_path).parents[2]
595
+ old_root = Path(self.processed_data_path).parents[3]
579
596
 
580
597
  # Updates all paths by replacing old_root with new_root
581
598
  self.processed_data_path = new_root.joinpath(Path(self.processed_data_path).relative_to(old_root))
@@ -730,30 +747,30 @@ class Destinations:
730
747
 
731
748
  @dataclass
732
749
  class SessionData(YamlConfig):
733
- """Provides methods for managing the data of a single experiment or training session across all destinations.
750
+ """Stores and manages the data layout of a single training or experiment session acquired using the Sun lab
751
+ Mesoscope-VR system.
734
752
 
735
- The primary purpose of this class is to maintain the session data structure across all supported destinations. It
736
- generates the paths used by all other classes from this library and classes from sl-experiment and sl-forgery
737
- libraries.
753
+ The primary purpose of this class is to maintain the session data structure across all supported destinations and
754
+ during all processing stages. It generates the paths used by all other classes from all Sun lab libraries that
755
+ interact with the session's data from the point of its creation and until the data is integrated into an
756
+ analysis dataset.
738
757
 
739
- If necessary, the class can be used to either generate a new session or to load an already existing session's data.
740
- When the class is used to create a new session, it automatically resolves the new session's name using the current
741
- UTC timestamp, down to microseconds. This ensures that each session name is unique and preserves the overall
758
+ When necessary, the class can be used to either generate a new session or load the layout of an already existing
759
+ session. When the class is used to create a new session, it generates the new session's name using the current
760
+ UTC timestamp, accurate to microseconds. This ensures that each session name is unique and preserves the overall
742
761
  session order.
743
762
 
744
763
  Notes:
745
764
  If this class is instantiated on the VRPC, it is expected that the BioHPC server, Synology NAS, and ScanImagePC
746
- data directories are mounted on the local host-machine via the SMB or equivalent protocol. All manipulations
747
- with these destinations are carried out with the assumption that the OS has full access to these directories
748
- and filesystems.
749
-
750
- If this class is instantiated on the BioHPC server, some methods from this class will not work as expected. It
751
- is essential that this class is not used outside the default sl-experiment and sl-forgery library runtimes to
752
- ensure it is used safely.
765
+ data directories are mounted on the local filesystem via the SMB or equivalent protocol. All manipulations
766
+ with these destinations are carried out with the assumption that the local OS has full access to these
767
+ directories and filesystems.
753
768
 
754
769
  This class is specifically designed for working with the data from a single session, performed by a single
755
770
  animal under the specific experiment. The class is used to manage both raw and processed data. It follows the
756
- data through acquisition, preprocessing and processing stages of the Sun lab data workflow.
771
+ data through acquisition, preprocessing and processing stages of the Sun lab data workflow. Together with
772
+ ProjectConfiguration class, this class serves as an entry point for all interactions with the managed session's
773
+ data.
757
774
  """
758
775
 
759
776
  project_name: str
@@ -767,31 +784,39 @@ class SessionData(YamlConfig):
767
784
  to be set to one of the four supported types: 'Lick training', 'Run training', 'Window checking' or 'Experiment'.
768
785
  """
769
786
  experiment_name: str | None
770
- """Stores the name of the experiment configuration file. If the session_type field is set to 'Experiment', this
771
- field is used to communicate the specific experiment configuration used by the session. During runtime, this is
772
- used to load the experiment configuration (to run the experiment) and to save the experiment configuration to the
773
- session raw_data folder. If the session is not an experiment session, this is statically set to None."""
787
+ """Stores the name of the experiment configuration file. If the session_type field is set to 'Experiment' and this
788
+ field is not None (null), it communicates the specific experiment configuration used by the session. During runtime,
789
+ the name stored here is used to load the specific experiment configuration data stored in a .yaml file with the
790
+ same name. If the session is not an experiment session, this field is ignored."""
774
791
  raw_data: RawData
775
- """Stores the paths to various directories and files used to store raw and preprocessed session data. Depending on
776
- class initialization location (VRPC or BioHPC server), the class automatically resolves the root directory path to
777
- either the VRPC project directory or the BioHPC cluster storage volume."""
792
+ """This section stores the paths to various directories and files that make up the raw_data subfolder. This
793
+ subfolder stores all data acquired during training or experiment runtimes before and after preprocessing. Note, the
794
+ preprocessing does not change the raw data in any way other than lossless compression and minor format
795
+ reorganization. Therefore, the data is considered 'raw' both before and after preprocessing."""
778
796
  processed_data: ProcessedData
779
- """Stores the paths to various directories used to store processed session data. Note, when this section is
780
- resolved for VRPC, it uses the same local session directory as the raw_data folder. When this is resolved for the
781
- BioHPC server, it uses the 'fast' volume path."""
797
+ """This section stores the paths to various directories used to store processed session data. Processed data is
798
+ generated from raw data by running various processing pipelines, such as suite2p, DeepLabCut and Sun lab's behavior
799
+ parsing pipelines. Typically, all data is processed on the BioHPC server and may be stored on a filesystem volume
800
+ different from the one that stores the raw data."""
782
801
  persistent_data: PersistentData
783
- """Stores the paths to various files and directories kept on VRPC and ScanImagePC after the session data is
784
- transferred to long-term storage destinations."""
802
+ """This section stores the paths to various files and directories that are held back on the VRPC and ScanImagePC
803
+ after the session data is transferred to long-term storage destinations as part of preprocessing. Typically, this
804
+ data is reused during the acquisition of future runtime session data. This section is not used during data
805
+ processing."""
785
806
  mesoscope_data: MesoscopeData
786
- """Stores the paths to various directories used by the ScanImagePC to store mesoscope-acquired session data,
787
- before it is moved to the VRPC during preprocessing."""
807
+ """This section stores the paths to various directories used by the ScanImagePC when acquiring mesoscope-related
808
+ data. During runtime, the VRPC (behavior data and experiment control) and the ScanImagePC (brain activity data and
809
+ Mesoscope control) operate mostly independently of each-other. During preprocessing, the VRPC pulls the data from
810
+ the ScanImagePC, using the paths in this section to find the data to be transferred. This section is not used
811
+ during data processing."""
788
812
  destinations: Destinations
789
- """Stores the paths to the destination directories on the BioHPC server and Synology NAS, to which the data is
790
- copied as part of preprocessing. Both of these directories should be accessible for the VRPC's filesystem via an
791
- SMB or equivalent protocol."""
813
+ """This section stores the paths to the destination directories on the BioHPC server and Synology NAS, to which the
814
+ data is copied as part of preprocessing for long-term storage and further processing. Both of these directories
815
+ should be mapped (mounted) to the VRPC's filesystem via the SMB or equivalent protocol. This section is not used
816
+ during data processing."""
792
817
 
793
818
  @classmethod
794
- def create_session(
819
+ def create(
795
820
  cls,
796
821
  animal_id: str,
797
822
  session_type: str,
@@ -799,35 +824,38 @@ class SessionData(YamlConfig):
799
824
  experiment_name: str | None = None,
800
825
  session_name: str | None = None,
801
826
  ) -> "SessionData":
802
- """Creates a new SessionData object and uses it to generate the session's data structure.
827
+ """Creates a new SessionData object and generates the new session's data structure.
803
828
 
804
- This method is used to initialize new session runtimes. It always assumes it is called on the VRPC and, as part
805
- of its runtime, resolves and generates the necessary local and ScanImagePC directories to support acquiring and
806
- preprocessing session's data.
829
+ This method is called by sl-experiment runtimes that create new training or experiment sessions to generate the
830
+ session data directory tree. It always assumes it is called on the VRPC and, as part of its runtime, resolves
831
+ and generates the necessary local and ScanImagePC directories to support acquiring and preprocessing session's
832
+ data.
807
833
 
808
834
  Notes:
809
- To load an already existing session data structure, use the load_session() method instead.
835
+ To load an already existing session data structure, use the load() method instead.
810
836
 
811
837
  This method automatically dumps the data of the created SessionData instance into the session_data.yaml file
812
838
  inside the root raw_data directory of the created hierarchy. It also finds and dumps other configuration
813
- files, such as project_configuration.yaml, suite2p_configuration.yaml, and experiment_configuration.yaml.
814
- This way, if the session's runtime is interrupted unexpectedly, it can still be processed.
839
+ files, such as project_configuration.yaml and experiment_configuration.yaml, into the same raw_data
840
+ directory. This ensures that if the session's runtime is interrupted unexpectedly, the acquired data can
841
+ still be processed.
815
842
 
816
843
  Args:
817
844
  animal_id: The ID code of the animal for which the data is acquired.
818
845
  session_type: The type of the session. Primarily, this determines how to read the session_descriptor.yaml
819
- file. Valid options are 'Lick training', 'Run training', or 'Experiment'.
820
- experiment_name: The name of the experiment to be executed as part of this session. This option is only used
821
- for 'Experiment' session types. It is used to find the target experiment configuration .YAML file and
822
- copy it into the session's raw_data directory.
823
- project_configuration: The initialized ProjectConfiguration instance that stores the data for the session's
824
- project. This is used to determine the root directory paths for all PCs used in the data workflow.
846
+ file. Valid options are 'Lick training', 'Run training', 'Window checking', or 'Experiment'.
847
+ experiment_name: The name of the experiment executed during managed session. This optional argument is only
848
+ used for 'Experiment' session types. It is used to find the experiment configuration .YAML file.
849
+ project_configuration: The initialized ProjectConfiguration instance that stores the session's project
850
+ configuration data. This is used to determine the root directory paths for all lab machines used during
851
+ data acquisition and processing.
825
852
  session_name: An optional session_name override. Generally, this argument should not be provided for most
826
- use cases. When provided, the method uses this name instead of generating a new timestamp-based name.
827
- This is only used when reformatting other data structures to follow Sun lab structure.
853
+ sessions. When provided, the method uses this name instead of generating a new timestamp-based name.
854
+ This is only used during the 'ascension' runtime to convert old data structures to the modern
855
+ lab standards.
828
856
 
829
857
  Returns:
830
- An initialized SessionData instance for the newly created session.
858
+ An initialized SessionData instance that stores the layout of the newly created session's data.
831
859
  """
832
860
 
833
861
  # Acquires the UTC timestamp to use as the session name
@@ -929,7 +957,7 @@ class SessionData(YamlConfig):
929
957
 
930
958
  # Saves the configured instance data to the session's folder, so that it can be reused during processing or
931
959
  # preprocessing
932
- instance._to_path()
960
+ instance._save()
933
961
 
934
962
  # Extracts and saves the necessary configuration classes to the session raw_data folder. Note, this list of
935
963
  # classes is not exhaustive. More classes are saved as part of the session runtime management class start() and
@@ -955,30 +983,31 @@ class SessionData(YamlConfig):
955
983
  return instance
956
984
 
957
985
  @classmethod
958
- def load_session(
986
+ def load(
959
987
  cls,
960
988
  session_path: Path,
961
989
  on_server: bool,
962
990
  ) -> "SessionData":
963
- """Loads the SessionData instance from the session_data.yaml file of the target session.
991
+ """Loads the SessionData instance from the target session's session_data.yaml file.
964
992
 
965
- This method is used to load the data for an already existing session. This is used to call preprocessing
966
- or processing runtime(s) for the target session. Depending on the call location, the method automatically
967
- resolves all necessary paths and creates the necessary directories.
993
+ This method is used to load the data layout information of an already existing session. Primarily, this is used
994
+ when preprocessing or processing session data. Depending on the call location (machine), the method
995
+ automatically resolves all necessary paths and creates the necessary directories.
968
996
 
969
997
  Notes:
970
- To create a new session, use the create_session() method instead.
998
+ To create a new session, use the create() method instead.
971
999
 
972
1000
  Args:
973
1001
  session_path: The path to the root directory of an existing session, e.g.: vrpc_root/project/animal/session.
974
1002
  on_server: Determines whether the method is used to initialize an existing session on the VRPC or the
975
- BioHPC server.
1003
+ BioHPC server. Note, VRPC runtimes use the same 'root' directory to store raw_data and processed_data
1004
+ subfolders. BioHPC server runtimes use different volumes (drives) to store these subfolders.
976
1005
 
977
1006
  Returns:
978
1007
  An initialized SessionData instance for the session whose data is stored at the provided path.
979
1008
 
980
1009
  Raises:
981
- FileNotFoundError: If the 'session_data.yaml' file is not found after resolving the provided path.
1010
+ FileNotFoundError: If the 'session_data.yaml' file is not found under the session_path/raw_data/ subfolder.
982
1011
  """
983
1012
  # To properly initialize the SessionData instance, the provided path should contain the raw_data directory
984
1013
  # with session_data.yaml file.
@@ -993,13 +1022,13 @@ class SessionData(YamlConfig):
993
1022
  console.error(message=message, error=FileNotFoundError)
994
1023
 
995
1024
  # Loads class data from .yaml
996
- instance: SessionData = cls.from_yaml(file_path=session_path) # type: ignore
1025
+ instance: SessionData = cls.from_yaml(file_path=session_data_path) # type: ignore
997
1026
 
998
1027
  # The method assumes that the 'donor' .yaml file is always stored inside the raw_data directory of the session
999
1028
  # to be processed. Since the directory itself might have moved (between or even within the same PC) relative to
1000
1029
  # where it was when the SessionData snapshot was generated, reconfigures the paths to all raw_data files using
1001
1030
  # the root from above.
1002
- instance.raw_data.switch_root(new_root=session_path)
1031
+ instance.raw_data.switch_root(new_root=session_path.parents[2])
1003
1032
 
1004
1033
  # Resolves the paths to the processed_data directories. The resolution strategy depends on whether the method is
1005
1034
  # called on the VRPC (locally) or the BioHPC server (remotely).
@@ -1007,7 +1036,7 @@ class SessionData(YamlConfig):
1007
1036
  # Local runtimes use the same root session directory for both raw_data and processed_data. This stems from
1008
1037
  # the assumption that most local machines in the lab only use NVME (fast) volumes and, therefore, do not
1009
1038
  # need to separate 'storage' and 'working' data.
1010
- instance.processed_data.switch_root(new_root=session_path)
1039
+ instance.processed_data.switch_root(new_root=session_path.parents[2])
1011
1040
 
1012
1041
  else:
1013
1042
  # The BioHPC server stores raw_data on slow volume and processed_data on fast (NVME) volume. Therefore, to
@@ -1025,12 +1054,12 @@ class SessionData(YamlConfig):
1025
1054
  # Returns the initialized SessionData instance to caller
1026
1055
  return instance
1027
1056
 
1028
- def _to_path(self) -> None:
1057
+ def _save(self) -> None:
1029
1058
  """Saves the instance data to the 'raw_data' directory of the managed session as a 'session_data.yaml' file.
1030
1059
 
1031
1060
  This is used to save the data stored in the instance to disk, so that it can be reused during preprocessing or
1032
1061
  data processing. The method is intended to only be used by the SessionData instance itself during its
1033
- create_session() method runtime.
1062
+ create() method runtime.
1034
1063
  """
1035
1064
 
1036
1065
  # Copies instance data to prevent it from being modified by reference when executing the steps below
@@ -101,6 +101,7 @@ def calculate_directory_checksum(
101
101
  process_file = partial(_calculate_file_checksum, directory)
102
102
 
103
103
  # Submits all tasks to be executed in parallel
104
+ # noinspection PyTypeChecker
104
105
  future_to_path = {executor.submit(process_file, file): file for file in files}
105
106
 
106
107
  # Collects results as they complete
@@ -52,10 +52,10 @@ class Main:
52
52
  """Determines the number of frames to process, if greater than zero. If negative (-1), the suite2p is configured
53
53
  to process all available frames."""
54
54
 
55
- multiplane_parallel: bool = False
56
- """Determines whether to parallelize plane processing for multiplane data. This requires a properly configured
57
- server to parallelize the computations and will not work on the local machine. Due to how suite2p is used in the
58
- lab, this has to always be set to False."""
55
+ multiplane_parallel: bool = True
56
+ """Determines whether to parallelize plane processing for multiplane data. Assuming that this configuration class is
57
+ used together with Sun lab optimized suite2p, it is always recommended to have this set to True for most runtimes.
58
+ """
59
59
 
60
60
  ignore_flyback: list[int] = field(default_factory=list)
61
61
  """The list of plane indices to ignore as flyback planes that typically contain no valid imaging data."""
@@ -162,9 +162,11 @@ class Registration:
162
162
  nimg_init: int = 500
163
163
  """The number of frames to use to compute the reference image for registration."""
164
164
 
165
- batch_size: int = 1000
166
- """The number of frames to register simultaneously in each batch. This depends on memory constraints. It is faster
167
- to run the registration if the batch is larger, but it requires more RAM."""
165
+ batch_size: int = 100
166
+ """The number of frames to register simultaneously in each batch. When processing data on fast (NVME) drives,
167
+ increasing this parameter has minimal benefits and results in undue RAM use overhead. Therefore, on fast drives,
168
+ keep this number low. On slow drives, increasing this number may result in faster runtime, at the expense of
169
+ increased RAM use."""
168
170
 
169
171
  maxregshift: float = 0.1
170
172
  """The maximum allowed shift during registration, given as a fraction of the frame size, in pixels
@@ -444,6 +446,11 @@ class Suite2PConfiguration(YamlConfig):
444
446
  for section_name, section in asdict(self).items():
445
447
  # Adds all keys and values from each section to the combined dictionary
446
448
  if isinstance(section, dict):
449
+ # Since some keys in the original suite2p configuration file use 'unconventional' names, we opted to use
450
+ # conventional names in our configuration file. To make the 'ops' version of this file fully compatible
451
+ # with suite2p, we need to translate all such modified keys back to values expected by suite2p.
452
+ if "one_p_reg" in section.keys():
453
+ section["1Preg"] = section.pop("one_p_reg")
447
454
  combined_ops.update(section)
448
455
 
449
456
  return combined_ops