sl-shared-assets 1.0.0rc20__py3-none-any.whl → 1.0.0rc22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +27 -27
- sl_shared_assets/__init__.pyi +24 -22
- sl_shared_assets/cli.py +266 -40
- sl_shared_assets/cli.pyi +73 -14
- sl_shared_assets/data_classes/__init__.py +23 -20
- sl_shared_assets/data_classes/__init__.pyi +18 -18
- sl_shared_assets/data_classes/configuration_data.py +408 -26
- sl_shared_assets/data_classes/configuration_data.pyi +173 -15
- sl_shared_assets/data_classes/runtime_data.py +49 -43
- sl_shared_assets/data_classes/runtime_data.pyi +37 -40
- sl_shared_assets/data_classes/session_data.py +168 -914
- sl_shared_assets/data_classes/session_data.pyi +55 -350
- sl_shared_assets/data_classes/surgery_data.py +3 -3
- sl_shared_assets/data_classes/surgery_data.pyi +2 -2
- sl_shared_assets/tools/__init__.py +8 -1
- sl_shared_assets/tools/__init__.pyi +11 -1
- sl_shared_assets/tools/ascension_tools.py +27 -26
- sl_shared_assets/tools/ascension_tools.pyi +5 -5
- sl_shared_assets/tools/packaging_tools.py +14 -1
- sl_shared_assets/tools/packaging_tools.pyi +4 -0
- sl_shared_assets/tools/project_management_tools.py +164 -0
- sl_shared_assets/tools/project_management_tools.pyi +48 -0
- {sl_shared_assets-1.0.0rc20.dist-info → sl_shared_assets-1.0.0rc22.dist-info}/METADATA +21 -4
- sl_shared_assets-1.0.0rc22.dist-info/RECORD +36 -0
- sl_shared_assets-1.0.0rc22.dist-info/entry_points.txt +8 -0
- sl_shared_assets/suite2p/__init__.py +0 -8
- sl_shared_assets/suite2p/__init__.pyi +0 -4
- sl_shared_assets/suite2p/multi_day.py +0 -224
- sl_shared_assets/suite2p/multi_day.pyi +0 -104
- sl_shared_assets/suite2p/single_day.py +0 -564
- sl_shared_assets/suite2p/single_day.pyi +0 -220
- sl_shared_assets-1.0.0rc20.dist-info/RECORD +0 -40
- sl_shared_assets-1.0.0rc20.dist-info/entry_points.txt +0 -4
- {sl_shared_assets-1.0.0rc20.dist-info → sl_shared_assets-1.0.0rc22.dist-info}/WHEEL +0 -0
- {sl_shared_assets-1.0.0rc20.dist-info → sl_shared_assets-1.0.0rc22.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,224 +0,0 @@
|
|
|
1
|
-
"""This module stores the classes used to configure the multi-day (across-session) sl-suite2p pipeline. This pipeline
|
|
2
|
-
extends the original suite2p code to support tracking the same objects (cells) across multiple days. Both single-day
|
|
3
|
-
(original) and multi-day (extended) pipelines are available as part of the Sun lab maintained sl-suite2p package."""
|
|
4
|
-
|
|
5
|
-
from typing import Any
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from dataclasses import field, asdict, dataclass
|
|
8
|
-
|
|
9
|
-
import numpy as np
|
|
10
|
-
from ataraxis_base_utilities import ensure_directory_exists
|
|
11
|
-
from ataraxis_data_structures import YamlConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass()
|
|
15
|
-
class IO:
|
|
16
|
-
"""Stores parameters that control data input and output during various stages of the pipeline."""
|
|
17
|
-
|
|
18
|
-
session_ids: list[str] = field(default_factory=list)
|
|
19
|
-
"""Stores the list of session IDs to register across days. This field should have the same length and order as the
|
|
20
|
-
session_folders list. Primarily, session IDs are used in terminal printouts to identify processed sessions to human
|
|
21
|
-
operators."""
|
|
22
|
-
|
|
23
|
-
session_folders: list[str] = field(default_factory=list)
|
|
24
|
-
"""Specifies the list of sessions to register across days, as absolute paths to their /suite2p directories
|
|
25
|
-
e.g: root/session/processed_data/mesoscope_data/suite2p. The suite2p directory is created as part of the
|
|
26
|
-
single-session suite2p processing, assuming the default value of the 'save_folder' SingleDayS2PConfiguration class
|
|
27
|
-
attribute was not modified. Note, each suite2p directory has to contain the 'combined' plane folder, which is
|
|
28
|
-
created if the 'combined' SingleDayS2PConfiguration attribute is 'True'."""
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
@dataclass()
|
|
32
|
-
class Hardware:
|
|
33
|
-
"""Stores parameters that control how the suite2p interacts with the hardware of the host-computer to accelerate
|
|
34
|
-
processing speed."""
|
|
35
|
-
|
|
36
|
-
parallelize_registration: bool = True
|
|
37
|
-
"""Determines whether to parallelize certain multi-day registration pipeline steps. Running these steps in parallel
|
|
38
|
-
results in faster overall processing, but increases the RAM usage. Since multi-day processing does not automatically
|
|
39
|
-
parallelize operations to all cores, it is generally safe and recommended to always enable this option."""
|
|
40
|
-
|
|
41
|
-
registration_workers: int = -1
|
|
42
|
-
"""The number of parallel workers (cores) to use when parallelizing multi-day registration. Setting this to a
|
|
43
|
-
negative value uses all available cores. Setting this to zero or one disables parallelization."""
|
|
44
|
-
|
|
45
|
-
parallelize_extraction: bool = False
|
|
46
|
-
"""Determines whether to extract multi-day cell fluorescence from multiple sessions at the same time. Note,
|
|
47
|
-
fluorescence extraction already contains automatic parallelization and will use all available cores to a certain
|
|
48
|
-
extent. Extracting data for multiple sessions at the same time is still faster due to a more efficient core
|
|
49
|
-
utilization, but typically does not not scale well (peaks for 2-3 parallel sessions) and majorly increase the RAM
|
|
50
|
-
usage.
|
|
51
|
-
"""
|
|
52
|
-
|
|
53
|
-
parallel_sessions: int = 3
|
|
54
|
-
"""The number of sessions to process in-parallel when extracting multi-day fluorescence data. Since this
|
|
55
|
-
parallelization works on top of existing suite2p numba-parallelization, it will use all available cores regardless
|
|
56
|
-
of the number of parallelized sessions. Instead, this parameter can be tuned to control the total RAM usage and
|
|
57
|
-
the extent of overall core utilization. Setting this to a value at or below one will disable session
|
|
58
|
-
parallelization."""
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
@dataclass()
|
|
62
|
-
class CellDetection:
|
|
63
|
-
"""Stores parameters for selecting single-day-registered cells (ROIs) to be tracked across multiple sessions (days)."""
|
|
64
|
-
|
|
65
|
-
probability_threshold: float = 0.85
|
|
66
|
-
"""The minimum required probability score assigned to the cell (ROI) by the single-day suite2p classifier. Cells
|
|
67
|
-
with a lower classifier score are excluded from multi-day processing."""
|
|
68
|
-
|
|
69
|
-
maximum_size: int = 1000
|
|
70
|
-
"""The maximum allowed cell (ROI) size, in pixels. Cells with a larger pixel size are excluded from processing."""
|
|
71
|
-
|
|
72
|
-
mesoscope_stripe_borders: list[int] = field(default_factory=list)
|
|
73
|
-
"""Stores the x-coordinates of combined mesoscope image stripe (ROI) borders. For mesoscope images, 'stripes' are
|
|
74
|
-
the individual imaging ROIs acquired in the 'multiple-ROI' mode. Keep this field set to an empty list to skip
|
|
75
|
-
stripe border-filtering or when working with non-mesoscope images.
|
|
76
|
-
"""
|
|
77
|
-
|
|
78
|
-
stripe_margin: int = 30
|
|
79
|
-
"""The minimum required distance, in pixels, between the center-point (the median x-coordinate) of the cell (ROI)
|
|
80
|
-
and the mesoscope stripe border. Cells that are too close to stripe borders are excluded from processing to avoid
|
|
81
|
-
ambiguities associated with tracking cells that span multiple stripes. This parameter is only used if
|
|
82
|
-
'mesoscope_stripe_borders' field is not set to an empty list."""
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
@dataclass()
|
|
86
|
-
class Registration:
|
|
87
|
-
"""Stores parameters for aligning (registering) the sessions from multiple days to the same visual (sampling) space."""
|
|
88
|
-
|
|
89
|
-
image_type: str = "enhanced"
|
|
90
|
-
"""The type of suite2p-generated reference image to use for across-day registration. Supported options are
|
|
91
|
-
'enhanced', 'mean' and 'max'. This 'template' image is used to calculate the necessary deformation (transformations)
|
|
92
|
-
to register (align) all sessions to the same visual space."""
|
|
93
|
-
|
|
94
|
-
grid_sampling_factor: float = 1
|
|
95
|
-
"""Determines to what extent the grid sampling scales with the deformed image scale. Has to be between 0 and 1. By
|
|
96
|
-
making this value lower than 1, the grid is relatively fine at the the higher scales, allowing for more
|
|
97
|
-
deformations. This is used when resizing session images as part of the registration process."""
|
|
98
|
-
|
|
99
|
-
scale_sampling: int = 30
|
|
100
|
-
"""The number of iterations for each level (i.e. between each factor two in scale) to perform when computing the
|
|
101
|
-
deformations. Values between 20 and 30 are reasonable in most situations, but higher values yield better results in
|
|
102
|
-
general. The speed of the algorithm scales linearly with this value."""
|
|
103
|
-
|
|
104
|
-
speed_factor: float = 3
|
|
105
|
-
"""The relative force of the deformation transform applied when registering the sessions to the same visual space.
|
|
106
|
-
This is the most important parameter to tune. For most cases, a value between 1 and 5 is reasonable."""
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
@dataclass()
|
|
110
|
-
class Clustering:
|
|
111
|
-
"""Stores parameters for tracking (clustering) cell (ROI) masks across multiple registered sessions (days)."""
|
|
112
|
-
|
|
113
|
-
criterion: str = "distance"
|
|
114
|
-
"""Specifies the criterion for clustering (grouping) cell (ROI) masks from different sessions. Currently, the only
|
|
115
|
-
valid option is 'distance'."""
|
|
116
|
-
|
|
117
|
-
threshold: float = 0.75
|
|
118
|
-
"""Specifies the threshold for the clustering algorithm. Cell masks will be clustered (grouped) together if their
|
|
119
|
-
clustering criterion is below this threshold value."""
|
|
120
|
-
|
|
121
|
-
mask_prevalence: int = 50
|
|
122
|
-
"""Specifies the minimum percentage of all registered sessions that must include the clustered cell mask. Cell masks
|
|
123
|
-
present in fewer percent of sessions than this value are excluded from processing. This parameter is used to filter
|
|
124
|
-
out cells that are mostly silent or not distinguishable across sessions."""
|
|
125
|
-
|
|
126
|
-
pixel_prevalence: int = 50
|
|
127
|
-
"""Specifies the minimum percentage of all registered sessions in which a cell mask pixel must be present for it to
|
|
128
|
-
be used to construct the template mask. Pixels present in fewer percent of sessions than this value are not used to
|
|
129
|
-
define the template masks. Template masks are used to extract the cell fluorescence from the original (non-deformed)
|
|
130
|
-
visual space of every session. This parameter is used to isolate the part of the cell that is stable across
|
|
131
|
-
sessions, which is required for the extraction step to work correctly (target only the tracked cell)."""
|
|
132
|
-
|
|
133
|
-
step_sizes: list[int] = field(default_factory=lambda: [200, 200])
|
|
134
|
-
"""Specifies the block size for the cell clustering (across-session tracking) process, in pixels, in the order of
|
|
135
|
-
(height, width). To reduce the memory (RAM) overhead, the algorithm divides the deformed (shared) visual space into
|
|
136
|
-
blocks and then processes one (or more) blocks at a time."""
|
|
137
|
-
|
|
138
|
-
bin_size: int = 50
|
|
139
|
-
"""Specifies the size of bins used to discover cell masks within blocks during clustering. To avoid edge cases, the
|
|
140
|
-
algorithm clusters the cell masks within the region defined by the center-point of each cell +- bin_size. This works
|
|
141
|
-
on top of pre-sorting cells into spatial blocks defined by 'step_sizes'."""
|
|
142
|
-
|
|
143
|
-
maximum_distance: int = 20
|
|
144
|
-
"""Specifies the maximum distance, in pixels, that can separate masks across multiple sessions. The clustering
|
|
145
|
-
algorithm will consider cell masks located at most within this distance from each-other across days as the same
|
|
146
|
-
cells during tacking."""
|
|
147
|
-
|
|
148
|
-
minimum_size: int = 25
|
|
149
|
-
"""The minimum size of the non-overlapping cell (ROI) region, in pixels, that has to be covered by the template
|
|
150
|
-
mask, for the cell to be assigned to that template. This is used to determine which template(s) the cell belongs to
|
|
151
|
-
(if any), for the purpose of tracking it across sessions."""
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
@dataclass()
|
|
155
|
-
class MultiDayS2PConfiguration(YamlConfig):
|
|
156
|
-
"""Aggregates all parameters for the multi-day suite2p pipeline used to track cells across multiple days
|
|
157
|
-
(sessions) and extract their activity.
|
|
158
|
-
|
|
159
|
-
These settings are used to configure the multi-day suite2p extraction pipeline, which is based on the reference
|
|
160
|
-
implementation here: https://github.com/sprustonlab/multiday-suite2p-public. This class behaves similar to the
|
|
161
|
-
SingleDayS2PConfiguration class. It can be saved and loaded from a .YAML file and translated to dictionary or
|
|
162
|
-
ops.npy format, expected by the multi-day sl-suite2p pipeline.
|
|
163
|
-
"""
|
|
164
|
-
|
|
165
|
-
io: IO = field(default_factory=IO)
|
|
166
|
-
"""Stores parameters that control data input and output during various stages of the pipeline."""
|
|
167
|
-
hardware: Hardware = field(default_factory=Hardware)
|
|
168
|
-
"""Stores parameters that control how the suite2p interacts with the hardware of the host-computer to accelerate
|
|
169
|
-
processing speed."""
|
|
170
|
-
cell_detection: CellDetection = field(default_factory=CellDetection)
|
|
171
|
-
"""Stores parameters for selecting single-day-registered cells (ROIs) to be tracked across multiple sessions (days).
|
|
172
|
-
"""
|
|
173
|
-
registration: Registration = field(default_factory=Registration)
|
|
174
|
-
"""Stores parameters for aligning (registering) the sessions from multiple days to the same visual (sampling) space.
|
|
175
|
-
"""
|
|
176
|
-
clustering: Clustering = field(default_factory=Clustering)
|
|
177
|
-
"""Stores parameters for tracking (clustering) cell (ROI) masks across multiple registered sessions (days)."""
|
|
178
|
-
|
|
179
|
-
def to_npy(self, output_directory: Path) -> None:
|
|
180
|
-
"""Saves the managed configuration data as an 'ops.npy' file under the target directory.
|
|
181
|
-
|
|
182
|
-
This method is mostly called by internal sl-suite2p functions to translate the user-specified configuration
|
|
183
|
-
file into the format used by suite2p pipelines.
|
|
184
|
-
|
|
185
|
-
Notes:
|
|
186
|
-
If the target output directory does not exist when this method is called, it will be created.
|
|
187
|
-
|
|
188
|
-
Args:
|
|
189
|
-
output_directory: The path to the directory where the 'ops.npy' file should be saved.
|
|
190
|
-
"""
|
|
191
|
-
ensure_directory_exists(output_directory) # Creates the directory, if necessary
|
|
192
|
-
file_path = output_directory.joinpath("ops.npy") # Computes the output path
|
|
193
|
-
# Dumps the configuration data to 'ops.npy' file.
|
|
194
|
-
np.save(file_path, self.to_ops(), allow_pickle=True) # type: ignore
|
|
195
|
-
|
|
196
|
-
def to_config(self, output_directory: Path) -> None:
|
|
197
|
-
"""Saves the managed configuration data as a 'multi_day_s2p_configuration.yaml' file under the target
|
|
198
|
-
directory.
|
|
199
|
-
|
|
200
|
-
This method is typically used to dump the 'default' configuration parameters to disk as a user-editable
|
|
201
|
-
.yaml file. The user is then expected to modify these parameters as needed, before the class data is loaded and
|
|
202
|
-
used by the suite2p pipeline.
|
|
203
|
-
|
|
204
|
-
Notes:
|
|
205
|
-
If the target output directory does not exist when this method is called, it will be created.
|
|
206
|
-
|
|
207
|
-
Args:
|
|
208
|
-
output_directory: The path to the directory where the 'multi_day_s2p_configuration.yaml' file should be
|
|
209
|
-
saved.
|
|
210
|
-
"""
|
|
211
|
-
ensure_directory_exists(output_directory) # Creates the directory, if necessary
|
|
212
|
-
file_path = output_directory.joinpath("multi_day_s2p_configuration.yaml") # Computes the output path
|
|
213
|
-
|
|
214
|
-
# Note, this uses the same configuration name as the SessionData class, making it automatically compatible with
|
|
215
|
-
# Sun lab data structure.
|
|
216
|
-
self.to_yaml(file_path=file_path) # Dumps the data to a 'yaml' file.
|
|
217
|
-
|
|
218
|
-
def to_ops(self) -> dict[str, Any]:
|
|
219
|
-
"""Converts the class instance to a dictionary and returns it to caller.
|
|
220
|
-
|
|
221
|
-
This method is mostly called by internal sl-suite2p functions to translate the default configuration parameters
|
|
222
|
-
to the dictionary format used by suite2p pipelines.
|
|
223
|
-
"""
|
|
224
|
-
return asdict(self)
|
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
from typing import Any
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
from dataclasses import field, dataclass
|
|
4
|
-
|
|
5
|
-
from _typeshed import Incomplete
|
|
6
|
-
from ataraxis_data_structures import YamlConfig
|
|
7
|
-
|
|
8
|
-
@dataclass()
|
|
9
|
-
class IO:
|
|
10
|
-
"""Stores parameters that control data input and output during various stages of the pipeline."""
|
|
11
|
-
|
|
12
|
-
session_ids: list[str] = field(default_factory=list)
|
|
13
|
-
session_folders: list[str] = field(default_factory=list)
|
|
14
|
-
|
|
15
|
-
@dataclass()
|
|
16
|
-
class Hardware:
|
|
17
|
-
"""Stores parameters that control how the suite2p interacts with the hardware of the host-computer to accelerate
|
|
18
|
-
processing speed."""
|
|
19
|
-
|
|
20
|
-
parallelize_registration: bool = ...
|
|
21
|
-
registration_workers: int = ...
|
|
22
|
-
parallelize_extraction: bool = ...
|
|
23
|
-
parallel_sessions: int = ...
|
|
24
|
-
|
|
25
|
-
@dataclass()
|
|
26
|
-
class CellDetection:
|
|
27
|
-
"""Stores parameters for selecting single-day-registered cells (ROIs) to be tracked across multiple sessions (days)."""
|
|
28
|
-
|
|
29
|
-
probability_threshold: float = ...
|
|
30
|
-
maximum_size: int = ...
|
|
31
|
-
mesoscope_stripe_borders: list[int] = field(default_factory=list)
|
|
32
|
-
stripe_margin: int = ...
|
|
33
|
-
|
|
34
|
-
@dataclass()
|
|
35
|
-
class Registration:
|
|
36
|
-
"""Stores parameters for aligning (registering) the sessions from multiple days to the same visual (sampling) space."""
|
|
37
|
-
|
|
38
|
-
image_type: str = ...
|
|
39
|
-
grid_sampling_factor: float = ...
|
|
40
|
-
scale_sampling: int = ...
|
|
41
|
-
speed_factor: float = ...
|
|
42
|
-
|
|
43
|
-
@dataclass()
|
|
44
|
-
class Clustering:
|
|
45
|
-
"""Stores parameters for tracking (clustering) cell (ROI) masks across multiple registered sessions (days)."""
|
|
46
|
-
|
|
47
|
-
criterion: str = ...
|
|
48
|
-
threshold: float = ...
|
|
49
|
-
mask_prevalence: int = ...
|
|
50
|
-
pixel_prevalence: int = ...
|
|
51
|
-
step_sizes: list[int] = field(default_factory=Incomplete)
|
|
52
|
-
bin_size: int = ...
|
|
53
|
-
maximum_distance: int = ...
|
|
54
|
-
minimum_size: int = ...
|
|
55
|
-
|
|
56
|
-
@dataclass()
|
|
57
|
-
class MultiDayS2PConfiguration(YamlConfig):
|
|
58
|
-
"""Aggregates all parameters for the multi-day suite2p pipeline used to track cells across multiple days
|
|
59
|
-
(sessions) and extract their activity.
|
|
60
|
-
|
|
61
|
-
These settings are used to configure the multi-day suite2p extraction pipeline, which is based on the reference
|
|
62
|
-
implementation here: https://github.com/sprustonlab/multiday-suite2p-public. This class behaves similar to the
|
|
63
|
-
SingleDayS2PConfiguration class. It can be saved and loaded from a .YAML file and translated to dictionary or
|
|
64
|
-
ops.npy format, expected by the multi-day sl-suite2p pipeline.
|
|
65
|
-
"""
|
|
66
|
-
|
|
67
|
-
io: IO = field(default_factory=IO)
|
|
68
|
-
hardware: Hardware = field(default_factory=Hardware)
|
|
69
|
-
cell_detection: CellDetection = field(default_factory=CellDetection)
|
|
70
|
-
registration: Registration = field(default_factory=Registration)
|
|
71
|
-
clustering: Clustering = field(default_factory=Clustering)
|
|
72
|
-
def to_npy(self, output_directory: Path) -> None:
|
|
73
|
-
"""Saves the managed configuration data as an 'ops.npy' file under the target directory.
|
|
74
|
-
|
|
75
|
-
This method is mostly called by internal sl-suite2p functions to translate the user-specified configuration
|
|
76
|
-
file into the format used by suite2p pipelines.
|
|
77
|
-
|
|
78
|
-
Notes:
|
|
79
|
-
If the target output directory does not exist when this method is called, it will be created.
|
|
80
|
-
|
|
81
|
-
Args:
|
|
82
|
-
output_directory: The path to the directory where the 'ops.npy' file should be saved.
|
|
83
|
-
"""
|
|
84
|
-
def to_config(self, output_directory: Path) -> None:
|
|
85
|
-
"""Saves the managed configuration data as a 'multi_day_s2p_configuration.yaml' file under the target
|
|
86
|
-
directory.
|
|
87
|
-
|
|
88
|
-
This method is typically used to dump the 'default' configuration parameters to disk as a user-editable
|
|
89
|
-
.yaml file. The user is then expected to modify these parameters as needed, before the class data is loaded and
|
|
90
|
-
used by the suite2p pipeline.
|
|
91
|
-
|
|
92
|
-
Notes:
|
|
93
|
-
If the target output directory does not exist when this method is called, it will be created.
|
|
94
|
-
|
|
95
|
-
Args:
|
|
96
|
-
output_directory: The path to the directory where the 'multi_day_s2p_configuration.yaml' file should be
|
|
97
|
-
saved.
|
|
98
|
-
"""
|
|
99
|
-
def to_ops(self) -> dict[str, Any]:
|
|
100
|
-
"""Converts the class instance to a dictionary and returns it to caller.
|
|
101
|
-
|
|
102
|
-
This method is mostly called by internal sl-suite2p functions to translate the default configuration parameters
|
|
103
|
-
to the dictionary format used by suite2p pipelines.
|
|
104
|
-
"""
|