sl-shared-assets 1.0.0rc15__py3-none-any.whl → 1.0.0rc17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

@@ -528,13 +528,13 @@ class ConfigurationData:
528
528
  project_configuration_path: Path = Path()
529
529
  """Stores the path to the project_configuration.yaml file. This file contains the snapshot of the configuration
530
530
  parameters for the session's project."""
531
- suite2p_configuration_path: Path = Path()
532
- """Stores the path to the suite2p_configuration.yaml file stored inside the project's 'configuration' directory on
533
- the fast BioHPC server volume. This configuration file specifies the parameters for the 'single day' suite2p
534
- registration pipeline, which is applied to each session that generates brain activity data."""
535
- multiday_configuration_path: Path = Path()
536
- """Stores the path to the multiday_configuration.yaml file stored inside the project's 'configuration' directory
537
- on the fast BioHPC server volume. This configuration file specifies the parameters for the 'multiday'
531
+ single_day_s2p_configuration_path: Path = Path()
532
+ """Stores the path to the single_day_s2p_configuration.yaml file stored inside the project's 'configuration'
533
+ directory on the fast BioHPC server volume. This configuration file specifies the parameters for the 'single day'
534
+ suite2p registration pipeline, which is applied to each session that generates brain activity data."""
535
+ multi_day_s2p_configuration_path: Path = Path()
536
+ """Stores the path to the multi_day_s2p_configuration.yaml file stored inside the project's 'configuration'
537
+ directory on the fast BioHPC server volume. This configuration file specifies the parameters for the 'multiday'
538
538
  sl-suite2p-based registration pipelines used tot rack brain cells across multiple sessions."""
539
539
 
540
540
  def resolve_paths(self, root_directory_path: Path, experiment_name: str | None = None) -> None:
@@ -559,8 +559,8 @@ class ConfigurationData:
559
559
  else:
560
560
  self.experiment_configuration_path = self.configuration_path.joinpath(f"{experiment_name}.yaml")
561
561
  self.project_configuration_path = self.configuration_path.joinpath("project_configuration.yaml")
562
- self.suite2p_configuration_path = self.configuration_path.joinpath("suite2p_configuration.yaml")
563
- self.multiday_configuration_path = self.configuration_path.joinpath("multiday_configuration.yaml")
562
+ self.single_day_s2p_configuration_path = self.configuration_path.joinpath("single_day_s2p_configuration.yaml")
563
+ self.multi_day_s2p_configuration_path = self.configuration_path.joinpath("multi_day_s2p_configuration.yaml")
564
564
 
565
565
  def make_directories(self) -> None:
566
566
  """Ensures that all major subdirectories and the root directory exist."""
@@ -1162,6 +1162,7 @@ class SessionData(YamlConfig):
1162
1162
  # RAW DATA
1163
1163
  new_root = local_root.joinpath(instance.project_name, instance.animal_id, instance.session_name, "raw_data")
1164
1164
  instance.raw_data.resolve_paths(root_directory_path=new_root)
1165
+ instance.raw_data.make_directories()
1165
1166
 
1166
1167
  # Uses the adjusted raw_data section to load the ProjectConfiguration instance. This is used below to resolve
1167
1168
  # all other SessionData sections, as it stores various required root directories.
@@ -1178,10 +1179,12 @@ class SessionData(YamlConfig):
1178
1179
  root_directory_path=new_root,
1179
1180
  experiment_name=instance.experiment_name,
1180
1181
  )
1182
+ instance.configuration_data.make_directories()
1181
1183
 
1182
1184
  # DEEPLABCUT
1183
1185
  new_root = local_root.joinpath(instance.project_name, "deeplabcut")
1184
1186
  instance.deeplabcut_data.resolve_paths(root_directory_path=new_root)
1187
+ instance.deeplabcut_data.make_directories()
1185
1188
 
1186
1189
  # Resolves the roots for all VRPC-specific sections that use the data from the ProjectConfiguration instance:
1187
1190
 
@@ -1239,6 +1242,7 @@ class SessionData(YamlConfig):
1239
1242
  instance.project_name, instance.animal_id, instance.session_name, "processed_data"
1240
1243
  )
1241
1244
  )
1245
+ instance.processed_data.make_directories()
1242
1246
 
1243
1247
  # Ensures that project configuration and session data classes are present in both raw_data and processed_data
1244
1248
  # directories. This ensures that all data of the session can always be traced to the parent project, animal,
@@ -1270,6 +1274,20 @@ class SessionData(YamlConfig):
1270
1274
  create() method runtime.
1271
1275
  """
1272
1276
 
1277
+ origin = copy.deepcopy(self)
1278
+
1279
+ # Resets all path fields to null. These fields are not loaded from disk when the instance is loaded, so setting
1280
+ # them to null has no negative consequences. Conversely, keeping these fields with Path objects prevents the
1281
+ # SessionData instance from being loaded from disk.
1282
+ origin.raw_data = None # type: ignore
1283
+ origin.processed_data = None # type: ignore
1284
+ origin.configuration_data = None # type: ignore
1285
+ origin.deeplabcut_data = None # type: ignore
1286
+ origin.vrpc_persistent_data = None # type: ignore
1287
+ origin.scanimagepc_persistent_data = None # type: ignore
1288
+ origin.mesoscope_data = None # type: ignore
1289
+ origin.destinations = None # type: ignore
1290
+
1273
1291
  # Saves instance data as a .YAML file
1274
1292
  self.to_yaml(file_path=self.raw_data.session_data_path)
1275
1293
  self.to_yaml(file_path=self.processed_data.session_data_path)
@@ -4,6 +4,9 @@ extends the original suite2p code to support tracking the same objects (cells) a
4
4
 
5
5
  from typing import Any
6
6
  from dataclasses import field, asdict, dataclass
7
+ from pathlib import Path
8
+ import numpy as np
9
+ from ataraxis_base_utilities import ensure_directory_exists
7
10
 
8
11
  from ataraxis_data_structures import YamlConfig
9
12
 
@@ -12,54 +15,81 @@ from ataraxis_data_structures import YamlConfig
12
15
  class IO:
13
16
  """Stores parameters that control data input and output during various stages of the pipeline."""
14
17
 
15
- sessions: list[str] = field(default_factory=list)
18
+ session_ids: list[str] = field(default_factory=list)
19
+ """Stores the list of session IDs to register across days. This field should have the same length and order as the
20
+ session_folders list. Primarily, session IDs are used in terminal printouts to identify processed sessions to human
21
+ operators."""
22
+
23
+ session_folders: list[str] = field(default_factory=list)
16
24
  """Specifies the list of sessions to register across days, as absolute paths to their /suite2p directories
17
- e.g: root/project/animal/session/processed_data/suite2p. The suite2p directory is created as part of the
18
- 'single-day' suite2p runtime, assuming the default value of the 'save_folder' SingleDayS2PConfiguration class
25
+ e.g: root/session/processed_data/mesoscope_data/suite2p. The suite2p directory is created as part of the
26
+ single-session suite2p processing, assuming the default value of the 'save_folder' SingleDayS2PConfiguration class
19
27
  attribute was not modified. Note, each suite2p directory has to contain the 'combined' plane folder, which is
20
- created if the 'combined' SingleDayS2PConfiguration class attribute is 'True'."""
28
+ created if the 'combined' SingleDayS2PConfiguration attribute is 'True'."""
29
+
30
+
31
+ @dataclass()
32
+ class Hardware:
33
+ """Stores parameters that control how the suite2p interacts with the hardware of the host-computer to accelerate
34
+ processing speed."""
35
+
36
+ parallelize_registration: bool = True
37
+ """Determines whether to parallelize certain multi-day registration pipeline steps. Running these steps in parallel
38
+ results in faster overall processing, but increases the RAM usage. Since multi-day processing does not automatically
39
+ parallelize operations to all cores, it is generally safe and recommended to always enable this option."""
40
+
41
+ registration_workers: int = -1
42
+ """The number of parallel workers (cores) to use when parallelizing multi-day registration. Setting this to a
43
+ negative value uses all available cores. Setting this to zero or one disables parallelization."""
44
+
45
+ parallelize_extraction: bool = False
46
+ """Determines whether to extract multi-day cell fluorescence from multiple sessions at the same time. Note,
47
+ fluorescence extraction already contains automatic parallelization and will use all available cores to a certain
48
+ extent. Extracting data for multiple sessions at the same time is still faster due to a more efficient core
49
+ utilization, but typically does not not scale well (peaks for 2-3 parallel sessions) and majorly increase the RAM
50
+ usage.
51
+ """
21
52
 
22
- mesoscan: bool = True
23
- """Indicates whether the processed session /suite2p folders contain registered Mesoscope frames."""
53
+ parallel_sessions: int = 3
54
+ """The number of sessions to process in-parallel when extracting multi-day fluorescence data. Since this
55
+ parallelization works on top of existing suite2p numba-parallelization, it will use all available cores regardless
56
+ of the number of parallelized sessions. Instead, this parameter can be tuned to control the total RAM usage and
57
+ the extent of overall core utilization. Setting this to a value at or below one will disable session
58
+ parallelization."""
24
59
 
25
60
 
26
61
  @dataclass()
27
62
  class CellDetection:
28
63
  """Stores parameters for selecting single-day-registered cells (ROIs) to be tracked across multiple sessions (days).
29
-
30
- To maximize the tracking pipeline reliability, it is beneficial to pre-filter the cells whose identity (as cells)
31
- is not certain or that may be hard to track across sessions.
32
64
  """
33
65
 
34
66
  probability_threshold: float = 0.85
35
- """The minimum required probability score assigned to the cell (ROI) by the suite2p classifier. Cells with a lower
36
- classifier score are excluded from processing."""
67
+ """The minimum required probability score assigned to the cell (ROI) by the single-day suite2p classifier. Cells
68
+ with a lower classifier score are excluded from multi-day processing."""
37
69
 
38
70
  maximum_size: int = 1000
39
71
  """The maximum allowed cell (ROI) size, in pixels. Cells with a larger pixel size are excluded from processing."""
40
72
 
41
73
  mesoscope_stripe_borders: list[int] = field(default_factory=list)
42
- """Stores the x-coordinates of mesoscope combined image stripe (ROI) borders. For mesoscope images, 'stripes' are
43
- the individual imaging ROIs acquired in the 'multiple-ROI' mode. If this field is not overwritten by the user, the
44
- pipeline will read the border data from the combined plane 'ops.npy' file generated by single-day suite2p pipeline.
74
+ """Stores the x-coordinates of combined mesoscope image stripe (ROI) borders. For mesoscope images, 'stripes' are
75
+ the individual imaging ROIs acquired in the 'multiple-ROI' mode. Keep this field set to an empty list to skip
76
+ stripe border-filtering or when working with non-mesoscope images.
45
77
  """
46
78
 
47
79
  stripe_margin: int = 30
48
80
  """The minimum required distance, in pixels, between the center-point (the median x-coordinate) of the cell (ROI)
49
81
  and the mesoscope stripe border. Cells that are too close to stripe borders are excluded from processing to avoid
50
- ambiguities associated with tracking cells that span multiple stripes."""
82
+ ambiguities associated with tracking cells that span multiple stripes. This parameter is only used if
83
+ 'mesoscope_stripe_borders' field is not set to an empty list."""
51
84
 
52
85
 
53
86
  @dataclass()
54
87
  class Registration:
55
- """Stores parameters for aligning (registering) the sessions from multiple days to the same visual space.
56
-
57
- Registration is used to create a 'shared' visual space, allowing to track the same cells (ROIs) across otherwise
58
- variable visual space of each session.
88
+ """Stores parameters for aligning (registering) the sessions from multiple days to the same visual (sampling) space.
59
89
  """
60
90
 
61
91
  image_type: str = "enhanced"
62
- """The type of single-day suite2p-generated image to use for across-day registration. Supported options are
92
+ """The type of suite2p-generated reference image to use for across-day registration. Supported options are
63
93
  'enhanced', 'mean' and 'max'. This 'template' image is used to calculate the necessary deformation (transformations)
64
94
  to register (align) all sessions to the same visual space."""
65
95
 
@@ -75,18 +105,12 @@ class Registration:
75
105
 
76
106
  speed_factor: float = 3
77
107
  """The relative force of the deformation transform applied when registering the sessions to the same visual space.
78
- This is the most important parameter to tune."""
108
+ This is the most important parameter to tune. For most cases, a value between 1 and 5 is reasonable."""
79
109
 
80
110
 
81
111
  @dataclass()
82
112
  class Clustering:
83
- """Stores parameters for clustering cell (ROI) masks across multiple registered sessions.
84
-
85
- Clustering is used to track cells across sessions. If a group of ROIs across sessions is clustered together, it
86
- is likely that they represent the same cell (ROI) across all sessions. This process involves first creating a
87
- 'template' mask that tracks a cell using the registered (deformed) visual space and then using this template to
88
- track the cell in the original (non-deformed) visual space of each session.
89
- """
113
+ """Stores parameters for tracking (clustering) cell (ROI) masks across multiple registered sessions (days)."""
90
114
 
91
115
  criterion: str = "distance"
92
116
  """Specifies the criterion for clustering (grouping) cell (ROI) masks from different sessions. Currently, the only
@@ -98,23 +122,25 @@ class Clustering:
98
122
 
99
123
  mask_prevalence: int = 50
100
124
  """Specifies the minimum percentage of all registered sessions that must include the clustered cell mask. Cell masks
101
- present in fewer percent of sessions than this value are excluded from processing. This parameter is used to isolate
102
- the cells that are present (active) across sessions."""
125
+ present in fewer percent of sessions than this value are excluded from processing. This parameter is used to filter
126
+ out cells that are mostly silent or not distinguishable across sessions."""
103
127
 
104
128
  pixel_prevalence: int = 50
105
- """Specifies the minimum percentage of all registered sessions in which a pixel from a given cell mask must be
106
- present for it to be used to construct the template mask. Pixels present in fewer percent of sessions than this
107
- value are not used to define the 'template' mask coordinates. Template masks are used to extract the cell
108
- fluorescence from the 'original' visual space of every session. This parameter is used to isolate the part of the
109
- cell that is stable across sessions."""
129
+ """Specifies the minimum percentage of all registered sessions in which a cell mask pixel must be present for it to
130
+ be used to construct the template mask. Pixels present in fewer percent of sessions than this value are not used to
131
+ define the template masks. Template masks are used to extract the cell fluorescence from the original (non-deformed)
132
+ visual space of every session. This parameter is used to isolate the part of the cell that is stable across
133
+ sessions, which is required for the extraction step to work correctly (target only the tracked cell)."""
110
134
 
111
135
  step_sizes: list[int] = field(default_factory=lambda: [200, 200])
112
- """Specifies the block size for the clustering process, in pixels. Clustering is applied in blocks of this size,
113
- sampled across the processed plane image, to reduce the memory (RAM) overhead."""
136
+ """Specifies the block size for the cell clustering (across-session tracking) process, in pixels, in the order of
137
+ (height, width). To reduce the memory (RAM) overhead, the algorithm divides the deformed (shared) visual space into
138
+ blocks and then processes one (or more) blocks at a time."""
114
139
 
115
140
  bin_size: int = 50
116
141
  """Specifies the size of bins used to discover cell masks within blocks during clustering. To avoid edge cases, the
117
- algorithm clusters the cell masks within the region defined by the center-point of each cell +- bin_size."""
142
+ algorithm clusters the cell masks within the region defined by the center-point of each cell +- bin_size. This works
143
+ on top of pre-sorting cells into spatial blocks defined by 'step_sizes'."""
118
144
 
119
145
  maximum_distance: int = 20
120
146
  """Specifies the maximum distance, in pixels, that can separate masks across multiple sessions. The clustering
@@ -122,72 +148,78 @@ class Clustering:
122
148
  cells during tacking."""
123
149
 
124
150
  minimum_size: int = 25
125
- """The minimum size of the non-overlapping (with other cells) cell (ROI) region, in pixels, that has to be covered
126
- by the template mask, for the cell to be assigned to that template. This is used to determine which template(s) the
127
- cell belongs to (if any), for the purpose of tracking it across sessions."""
151
+ """The minimum size of the non-overlapping cell (ROI) region, in pixels, that has to be covered by the template
152
+ mask, for the cell to be assigned to that template. This is used to determine which template(s) the cell belongs to
153
+ (if any), for the purpose of tracking it across sessions."""
128
154
 
129
155
 
130
156
  @dataclass()
131
- class Demix:
132
- """Stores settings used to deconvolve fluorescence signals from cells tracked across multiple days.
157
+ class MultiDayS2PConfiguration(YamlConfig):
158
+ """Aggregates all parameters for the multi-day suite2p pipeline used to track cells across multiple days
159
+ (sessions) and extract their activity.
133
160
 
134
- This step applies the suite2p spike deconvolution algorithm to the cell masks isolated during clustering to extract
135
- the fluorescence of the cells tracked across multiple sessions (days). Generally, it should use the same parameters
136
- as were used by the single-day suite2p pipeline.
161
+ These settings are used to configure the multi-day suite2p extraction pipeline, which is based on the reference
162
+ implementation here: https://github.com/sprustonlab/multiday-suite2p-public. This class behaves similar to the
163
+ SingleDayS2PConfiguration class. It can be saved and loaded from a .YAML file and translated to dictionary or
164
+ ops.npy format, expected by the multi-day sl-suite2p pipeline.
137
165
  """
138
166
 
139
- baseline: str = "maximin"
140
- """Specifies the method to compute the baseline of each trace. This baseline is then subtracted from each cell.
141
- ‘maximin’ computes a moving baseline by filtering the data with a Gaussian of width 'sig_baseline' * 'fs', and then
142
- minimum filtering with a window of 'win_baseline' * 'fs', and then maximum filtering with the same window.
143
- ‘constant’ computes a constant baseline by filtering with a Gaussian of width 'sig_baseline' * 'fs' and then taking
144
- the minimum value of this filtered trace. ‘constant_percentile’ computes a constant baseline by taking the
145
- 'prctile_baseline' percentile of the trace."""
167
+ io: IO = field(default_factory=IO)
168
+ """Stores parameters that control data input and output during various stages of the pipeline."""
169
+ hardware: Hardware = field(default_factory=Hardware)
170
+ """Stores parameters that control how the suite2p interacts with the hardware of the host-computer to accelerate
171
+ processing speed."""
172
+ cell_detection: CellDetection = field(default_factory=CellDetection)
173
+ """Stores parameters for selecting single-day-registered cells (ROIs) to be tracked across multiple sessions (days).
174
+ """
175
+ registration: Registration = field(default_factory=Registration)
176
+ """Stores parameters for aligning (registering) the sessions from multiple days to the same visual (sampling) space.
177
+ """
178
+ clustering: Clustering = field(default_factory=Clustering)
179
+ """Stores parameters for tracking (clustering) cell (ROI) masks across multiple registered sessions (days)."""
146
180
 
147
- win_baseline: float = 60.0
148
- """The time window, in seconds, over which to compute the baseline filter."""
181
+ def to_npy(self, output_directory: Path) -> None:
182
+ """Saves the managed configuration data as an 'ops.npy' file under the target directory.
149
183
 
150
- sig_baseline: float = 10.0
151
- """The standard deviation, in seconds, of the Gaussian filter applied to smooth the baseline signal."""
184
+ This method is mostly called by internal sl-suite2p functions to translate the user-specified configuration
185
+ file into the format used by suite2p pipelines.
152
186
 
153
- l2_reg: float = 0.1
154
- """The L2 regularization strength applied during spike deconvolution."""
187
+ Notes:
188
+ If the target output directory does not exist when this method is called, it will be created.
155
189
 
156
- neucoeff: float = 0.7
157
- """The neuropil coefficient applied for signal correction before deconvolution."""
190
+ Args:
191
+ output_directory: The path to the directory where the 'ops.npy' file should be saved.
192
+ """
193
+ ensure_directory_exists(output_directory) # Creates the directory, if necessary
194
+ file_path = output_directory.joinpath("ops.npy") # Computes the output path
195
+ np.save(file_path, self.to_ops(), allow_pickle=True) # Dumps the configuration data to 'ops.npy' file.
158
196
 
197
+ def to_config(self, output_directory: Path) -> None:
198
+ """Saves the managed configuration data as a 'multi_day_s2p_configuration.yaml' file under the target
199
+ directory.
159
200
 
160
- @dataclass()
161
- class MultiDayS2PConfiguration(YamlConfig):
162
- """Aggregates all parameters for the multi-day suite2p pipeline used to track cells across multiple days
163
- (sessions) and extract their activity.
201
+ This method is typically used to dump the 'default' configuration parameters to disk as a user-editable
202
+ .yaml file. The user is then expected to modify these parameters as needed, before the class data is loaded and
203
+ used by the suite2p pipeline.
164
204
 
165
- These settings are used to configure the multiday suite2p extraction pipeline, which is based on the reference
166
- implementation here: https://github.com/sprustonlab/multiday-suite2p-public. This class behaves similar to the
167
- SingleDayS2PConfiguration class. It can be saved and loaded from a .YAML file and translated to dictionary format,
168
- expected by the multi-day sl-suite2p pipeline.
169
- """
205
+ Notes:
206
+ If the target output directory does not exist when this method is called, it will be created.
170
207
 
171
- cell_detection: CellDetection = field(default_factory=CellDetection)
172
- """Stores parameters for selecting single-day-registered cells (ROIs) to be tracked across multiple sessions
173
- (days)."""
174
- registration: Registration = field(default_factory=Registration)
175
- """Stores parameters for aligning (registering) the sessions from multiple days to the same visual space."""
176
- clustering: Clustering = field(default_factory=Clustering)
177
- """Stores parameters for clustering (tracking) cell (ROI) masks across multiple registered sessions."""
178
- demix: Demix = field(default_factory=Demix)
179
- """Stores settings used to deconvolve fluorescence signals from cells tracked across multiple days."""
180
- io: IO = field(default_factory=IO)
181
- """Stores parameters that control data input and output during various stages of the pipeline."""
208
+ Args:
209
+ output_directory: The path to the directory where the 'multi_day_s2p_configuration.yaml' file should be
210
+ saved.
211
+ """
212
+ ensure_directory_exists(output_directory) # Creates the directory, if necessary
213
+ file_path = output_directory.joinpath("multi_day_s2p_configuration.yaml") # Computes the output path
214
+
215
+ # Note, this uses the same configuration name as the SessionData class, making it automatically compatible with
216
+ # Sun lab data structure.
217
+ self.to_yaml(file_path=file_path) # Dumps the data to a 'yaml' file.
182
218
 
183
219
  def to_ops(self) -> dict[str, Any]:
184
220
  """Converts the class instance to a dictionary and returns it to caller.
185
221
 
186
- This dictionary can be passed to sl-suite2p multi-day functions as the 'ops' argument.
187
-
188
- Notes:
189
- Unlike the single-day configuration class, the dictionary generated by this method uses section names as
190
- top level keys and parameter names as second-level keys. This mimics the original multiday-pipeline
191
- configuration scheme.
222
+ This method is mostly called by internal sl-suite2p functions to translate the default configuration parameters
223
+ to the dictionary format used by suite2p pipelines.
192
224
  """
193
225
  return asdict(self)