sl-shared-assets 1.0.0rc13__py3-none-any.whl → 1.0.0rc15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (42) hide show
  1. sl_shared_assets/__init__.py +27 -9
  2. sl_shared_assets/__init__.pyi +71 -0
  3. sl_shared_assets/cli.py +13 -14
  4. sl_shared_assets/cli.pyi +28 -0
  5. sl_shared_assets/data_classes/__init__.py +63 -0
  6. sl_shared_assets/data_classes/__init__.pyi +61 -0
  7. sl_shared_assets/data_classes/configuration_data.py +64 -0
  8. sl_shared_assets/data_classes/configuration_data.pyi +37 -0
  9. sl_shared_assets/data_classes/runtime_data.py +233 -0
  10. sl_shared_assets/data_classes/runtime_data.pyi +145 -0
  11. sl_shared_assets/data_classes/session_data.py +1275 -0
  12. sl_shared_assets/data_classes/session_data.pyi +527 -0
  13. sl_shared_assets/data_classes/surgery_data.py +152 -0
  14. sl_shared_assets/data_classes/surgery_data.pyi +89 -0
  15. sl_shared_assets/server/__init__.py +8 -0
  16. sl_shared_assets/server/__init__.pyi +8 -0
  17. sl_shared_assets/server/job.py +140 -0
  18. sl_shared_assets/server/job.pyi +94 -0
  19. sl_shared_assets/server/server.py +213 -0
  20. sl_shared_assets/server/server.pyi +95 -0
  21. sl_shared_assets/suite2p/__init__.py +8 -0
  22. sl_shared_assets/suite2p/__init__.pyi +4 -0
  23. sl_shared_assets/suite2p/multi_day.py +193 -0
  24. sl_shared_assets/suite2p/multi_day.pyi +99 -0
  25. sl_shared_assets/{suite2p.py → suite2p/single_day.py} +55 -32
  26. sl_shared_assets/suite2p/single_day.pyi +192 -0
  27. sl_shared_assets/tools/__init__.py +8 -0
  28. sl_shared_assets/tools/__init__.pyi +5 -0
  29. sl_shared_assets/{ascension_tools.py → tools/ascension_tools.py} +3 -6
  30. sl_shared_assets/tools/ascension_tools.pyi +68 -0
  31. sl_shared_assets/tools/packaging_tools.pyi +52 -0
  32. sl_shared_assets/tools/transfer_tools.pyi +53 -0
  33. {sl_shared_assets-1.0.0rc13.dist-info → sl_shared_assets-1.0.0rc15.dist-info}/METADATA +1 -1
  34. sl_shared_assets-1.0.0rc15.dist-info/RECORD +40 -0
  35. sl_shared_assets/data_classes.py +0 -1656
  36. sl_shared_assets/server.py +0 -293
  37. sl_shared_assets-1.0.0rc13.dist-info/RECORD +0 -14
  38. /sl_shared_assets/{packaging_tools.py → tools/packaging_tools.py} +0 -0
  39. /sl_shared_assets/{transfer_tools.py → tools/transfer_tools.py} +0 -0
  40. {sl_shared_assets-1.0.0rc13.dist-info → sl_shared_assets-1.0.0rc15.dist-info}/WHEEL +0 -0
  41. {sl_shared_assets-1.0.0rc13.dist-info → sl_shared_assets-1.0.0rc15.dist-info}/entry_points.txt +0 -0
  42. {sl_shared_assets-1.0.0rc13.dist-info → sl_shared_assets-1.0.0rc15.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,213 @@
1
+ """This module provides the tools for working with the Sun lab BioHPC cluster. Specifically, the classes from this
2
+ module establish an API for submitting jobs to the shared data processing cluster (managed via SLURM) and monitoring
3
+ the running job status. All lab processing and analysis pipelines use this interface for accessing shared compute
4
+ resources.
5
+ """
6
+
7
+ import time
8
+ from pathlib import Path
9
+ import tempfile
10
+ from dataclasses import dataclass
11
+
12
+ import paramiko
13
+
14
+ # noinspection PyProtectedMember
15
+ from simple_slurm import Slurm # type: ignore
16
+ from paramiko.client import SSHClient
17
+ from ataraxis_base_utilities import LogLevel, console
18
+ from ataraxis_data_structures import YamlConfig
19
+
20
+ from .job import Job
21
+
22
+
23
+ def generate_server_credentials(
24
+ output_directory: Path, username: str, password: str, host: str = "cbsuwsun.biohpc.cornell.edu"
25
+ ) -> None:
26
+ """Generates a new server_credentials.yaml file under the specified directory, using input information.
27
+
28
+ This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
29
+ only used when setting up new host-computers in the lab.
30
+ """
31
+ ServerCredentials(username=username, password=password, host=host).to_yaml(
32
+ file_path=output_directory.joinpath("server_credentials.yaml")
33
+ )
34
+
35
+
36
+ @dataclass()
37
+ class ServerCredentials(YamlConfig):
38
+ """This class stores the hostname and credentials used to log into the BioHPC cluster to run Sun lab processing
39
+ pipelines.
40
+
41
+ Primarily, this is used as part of the sl-experiment library runtime to start data processing once it is
42
+ transferred to the BioHPC server during preprocessing. However, the same file can be used together with the Server
43
+ class API to run any computation jobs on the lab's BioHPC server.
44
+ """
45
+
46
+ username: str = "YourNetID"
47
+ """The username to use for server authentication."""
48
+ password: str = "YourPassword"
49
+ """The password to use for server authentication."""
50
+ host: str = "cbsuwsun.biohpc.cornell.edu"
51
+ """The hostname or IP address of the server to connect to."""
52
+
53
+
54
+ class Server:
55
+ """Encapsulates access to the Sun lab BioHPC processing server.
56
+
57
+ This class provides the API that allows accessing the BioHPC server to create and submit various SLURM-managed jobs
58
+ to the server. It functions as the central interface used by all processing pipelines in the lab to execute costly
59
+ data processing on the server.
60
+
61
+ Notes:
62
+ All lab processing pipelines expect the data to be stored on the server and all processing logic to be packaged
63
+ and installed into dedicated conda environments on the server.
64
+
65
+ This class assumes that the target server has SLURM job manager installed and accessible to the user whose
66
+ credentials are used to connect to the server as part of this class instantiation.
67
+
68
+ Args:
69
+ credentials_path: The path to the locally stored .yaml file that contains the server hostname and access
70
+ credentials.
71
+
72
+ Attributes:
73
+ _open: Tracks whether the connection to the server is open or not.
74
+ _client: Stores the initialized SSHClient instance used to interface with the server.
75
+ """
76
+
77
+ def __init__(self, credentials_path: Path) -> None:
78
+ # Tracker used to prevent __del__ from classing stop() for a partially initialized class.
79
+ self._open: bool = False
80
+
81
+ # Loads the credentials from the provided .yaml file
82
+ self._credentials: ServerCredentials = ServerCredentials.from_yaml(credentials_path) # type: ignore
83
+
84
+ # Establishes the SSH connection to the specified processing server. At most, attempts to connect to the server
85
+ # 30 times before terminating with an error
86
+ attempt = 0
87
+ while True:
88
+ console.echo(
89
+ f"Trying to connect to {self._credentials.host} (attempt {attempt}/30)...", level=LogLevel.INFO
90
+ )
91
+ try:
92
+ self._client: SSHClient = paramiko.SSHClient()
93
+ self._client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
94
+ self._client.connect(
95
+ self._credentials.host, username=self._credentials.username, password=self._credentials.password
96
+ )
97
+ console.echo(f"Connected to {self._credentials.host}", level=LogLevel.SUCCESS)
98
+ break
99
+ except paramiko.AuthenticationException:
100
+ message = (
101
+ f"Authentication failed when connecting to {self._credentials.host} using "
102
+ f"{self._credentials.username} user."
103
+ )
104
+ console.error(message, RuntimeError)
105
+ raise RuntimeError
106
+ except:
107
+ if attempt == 30:
108
+ message = f"Could not connect to {self._credentials.host} after 30 attempts. Aborting runtime."
109
+ console.error(message, RuntimeError)
110
+ raise RuntimeError
111
+
112
+ console.echo(
113
+ f"Could not SSH to {self._credentials.host}, retrying after a 2-second delay...",
114
+ level=LogLevel.WARNING,
115
+ )
116
+ attempt += 1
117
+ time.sleep(2)
118
+
119
+ def __del__(self) -> None:
120
+ """If the instance is connected to the server, terminates the connection before the instance is destroyed."""
121
+ self.close()
122
+
123
+ def submit_job(self, job: Job) -> Job:
124
+ """Submits the input job to the managed BioHPC server via SLURM job manager.
125
+
126
+ This method submits various jobs for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
127
+ method translates the Job object into the shell script, moves the script to the target working directory on
128
+ the server, and instructs the server to execute the shell script (via SLURM).
129
+
130
+ Args:
131
+ job: The Job object that contains all job data.
132
+
133
+ Returns:
134
+ The job object whose 'job_id' attribute had been modified with the job ID, if the job was successfully
135
+ submitted.
136
+
137
+ Raises:
138
+ RuntimeError: If job submission to the server fails.
139
+ """
140
+
141
+ # Generates a temporary shell script on the local machine. Uses tempfile to automatically remove the
142
+ # local script as soon as it is uploaded to the server.
143
+ with tempfile.TemporaryDirectory() as temp_dir:
144
+ local_script_path = Path(temp_dir).joinpath(f"{job.job_name}.sh")
145
+ fixed_script_content = job.command_script
146
+
147
+ # Creates a temporary script file locally and dumps translated command data into the file
148
+ with open(local_script_path, "w") as f:
149
+ f.write(fixed_script_content)
150
+
151
+ # Uploads the command script to the server
152
+ sftp = self._client.open_sftp()
153
+ sftp.put(localpath=local_script_path, remotepath=job.remote_script_path)
154
+ sftp.close()
155
+
156
+ # Makes the server-side script executable
157
+ self._client.exec_command(f"chmod +x {job.remote_script_path}")
158
+
159
+ # Submits the job to SLURM with sbatch and verifies submission state
160
+ job_output = self._client.exec_command(f"sbatch {job.remote_script_path}")[1].read().strip().decode()
161
+
162
+ # If batch_job is not in the output received from SLURM in response to issuing the submission command, raises an
163
+ # error.
164
+ if "Submitted batch job" not in job_output:
165
+ message = f"Failed to submit the {job.job_name} job to the BioHPC cluster."
166
+ console.error(message, RuntimeError)
167
+
168
+ # Fallback to appease mypy, should not be reachable
169
+ raise RuntimeError(message)
170
+
171
+ # Otherwise, extracts the job id assigned to the job by SLURM from the response and writes it to the processed
172
+ # Job object
173
+ job_id = job_output.split()[-1]
174
+ job.job_id = job_id
175
+ return job
176
+
177
+ def job_complete(self, job: Job) -> bool:
178
+ """Returns True if the job managed by the input Job instance has been completed or terminated its runtime due
179
+ to an error.
180
+
181
+ If the job is still running or is waiting inside the execution queue, returns False.
182
+
183
+ Args:
184
+ job: The Job object whose status needs to be checked.
185
+
186
+ Raises:
187
+ ValueError: If the input Job object does not contain a valid job_id, suggesting that it has not been
188
+ submitted to the server.
189
+ """
190
+
191
+ if job.job_id is None:
192
+ message = (
193
+ f"The input Job object for the job {job.job_name} does not contain a valid job_id. This indicates that "
194
+ f"the job has not been submitted to the server."
195
+ )
196
+ console.error(message, ValueError)
197
+
198
+ # This is here to appease mypy, it should not be reachable
199
+ raise ValueError(message)
200
+
201
+ if job.job_id not in self._client.exec_command(f"squeue -j {job.job_id}")[1].read().decode().strip():
202
+ return True
203
+ else:
204
+ return False
205
+
206
+ def close(self) -> None:
207
+ """Closes the SSH connection to the server.
208
+
209
+ This method has to be called before destroying the class instance to ensure proper resource cleanup.
210
+ """
211
+ # Prevents closing already closed connections
212
+ if self._open:
213
+ self._client.close()
@@ -0,0 +1,95 @@
1
+ from pathlib import Path
2
+ from dataclasses import dataclass
3
+
4
+ from simple_slurm import Slurm as Slurm
5
+ from paramiko.client import SSHClient as SSHClient
6
+ from ataraxis_data_structures import YamlConfig
7
+
8
+ from .job import Job as Job
9
+
10
+ def generate_server_credentials(
11
+ output_directory: Path, username: str, password: str, host: str = "cbsuwsun.biohpc.cornell.edu"
12
+ ) -> None:
13
+ """Generates a new server_credentials.yaml file under the specified directory, using input information.
14
+
15
+ This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
16
+ only used when setting up new host-computers in the lab.
17
+ """
18
+ @dataclass()
19
+ class ServerCredentials(YamlConfig):
20
+ """This class stores the hostname and credentials used to log into the BioHPC cluster to run Sun lab processing
21
+ pipelines.
22
+
23
+ Primarily, this is used as part of the sl-experiment library runtime to start data processing once it is
24
+ transferred to the BioHPC server during preprocessing. However, the same file can be used together with the Server
25
+ class API to run any computation jobs on the lab's BioHPC server.
26
+ """
27
+
28
+ username: str = ...
29
+ password: str = ...
30
+ host: str = ...
31
+
32
+ class Server:
33
+ """Encapsulates access to the Sun lab BioHPC processing server.
34
+
35
+ This class provides the API that allows accessing the BioHPC server to create and submit various SLURM-managed jobs
36
+ to the server. It functions as the central interface used by all processing pipelines in the lab to execute costly
37
+ data processing on the server.
38
+
39
+ Notes:
40
+ All lab processing pipelines expect the data to be stored on the server and all processing logic to be packaged
41
+ and installed into dedicated conda environments on the server.
42
+
43
+ This class assumes that the target server has SLURM job manager installed and accessible to the user whose
44
+ credentials are used to connect to the server as part of this class instantiation.
45
+
46
+ Args:
47
+ credentials_path: The path to the locally stored .yaml file that contains the server hostname and access
48
+ credentials.
49
+
50
+ Attributes:
51
+ _open: Tracks whether the connection to the server is open or not.
52
+ _client: Stores the initialized SSHClient instance used to interface with the server.
53
+ """
54
+
55
+ _open: bool
56
+ _credentials: ServerCredentials
57
+ _client: SSHClient
58
+ def __init__(self, credentials_path: Path) -> None: ...
59
+ def __del__(self) -> None:
60
+ """If the instance is connected to the server, terminates the connection before the instance is destroyed."""
61
+ def submit_job(self, job: Job) -> Job:
62
+ """Submits the input job to the managed BioHPC server via SLURM job manager.
63
+
64
+ This method submits various jobs for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
65
+ method translates the Job object into the shell script, moves the script to the target working directory on
66
+ the server, and instructs the server to execute the shell script (via SLURM).
67
+
68
+ Args:
69
+ job: The Job object that contains all job data.
70
+
71
+ Returns:
72
+ The job object whose 'job_id' attribute had been modified with the job ID, if the job was successfully
73
+ submitted.
74
+
75
+ Raises:
76
+ RuntimeError: If job submission to the server fails.
77
+ """
78
+ def job_complete(self, job: Job) -> bool:
79
+ """Returns True if the job managed by the input Job instance has been completed or terminated its runtime due
80
+ to an error.
81
+
82
+ If the job is still running or is waiting inside the execution queue, returns False.
83
+
84
+ Args:
85
+ job: The Job object whose status needs to be checked.
86
+
87
+ Raises:
88
+ ValueError: If the input Job object does not contain a valid job_id, suggesting that it has not been
89
+ submitted to the server.
90
+ """
91
+ def close(self) -> None:
92
+ """Closes the SSH connection to the server.
93
+
94
+ This method has to be called before destroying the class instance to ensure proper resource cleanup.
95
+ """
@@ -0,0 +1,8 @@
1
+ """This package provides the configuration classes used by the Sun lab maintained version of the suite2p library
2
+ (sl-suite2p package, https://github.com/Sun-Lab-NBB/suite2p) to process brain activity data within and across sessions
3
+ (days)."""
4
+
5
+ from .multi_day import MultiDayS2PConfiguration
6
+ from .single_day import SingleDayS2PConfiguration
7
+
8
+ __all__ = ["MultiDayS2PConfiguration", "SingleDayS2PConfiguration"]
@@ -0,0 +1,4 @@
1
+ from .multi_day import MultiDayS2PConfiguration as MultiDayS2PConfiguration
2
+ from .single_day import SingleDayS2PConfiguration as SingleDayS2PConfiguration
3
+
4
+ __all__ = ["MultiDayS2PConfiguration", "SingleDayS2PConfiguration"]
@@ -0,0 +1,193 @@
1
+ """This module stores the classes used to configure the multi-day (across-session) sl-suite2p pipeline. This pipeline
2
+ extends the original suite2p code to support tracking the same objects (cells) across multiple days. Both single-day
3
+ (original) and multi-day (extended) pipelines are available as part of the Sun lab maintained sl-suite2p package."""
4
+
5
+ from typing import Any
6
+ from dataclasses import field, asdict, dataclass
7
+
8
+ from ataraxis_data_structures import YamlConfig
9
+
10
+
11
+ @dataclass()
12
+ class IO:
13
+ """Stores parameters that control data input and output during various stages of the pipeline."""
14
+
15
+ sessions: list[str] = field(default_factory=list)
16
+ """Specifies the list of sessions to register across days, as absolute paths to their /suite2p directories
17
+ e.g: root/project/animal/session/processed_data/suite2p. The suite2p directory is created as part of the
18
+ 'single-day' suite2p runtime, assuming the default value of the 'save_folder' SingleDayS2PConfiguration class
19
+ attribute was not modified. Note, each suite2p directory has to contain the 'combined' plane folder, which is
20
+ created if the 'combined' SingleDayS2PConfiguration class attribute is 'True'."""
21
+
22
+ mesoscan: bool = True
23
+ """Indicates whether the processed session /suite2p folders contain registered Mesoscope frames."""
24
+
25
+
26
+ @dataclass()
27
+ class CellDetection:
28
+ """Stores parameters for selecting single-day-registered cells (ROIs) to be tracked across multiple sessions (days).
29
+
30
+ To maximize the tracking pipeline reliability, it is beneficial to pre-filter the cells whose identity (as cells)
31
+ is not certain or that may be hard to track across sessions.
32
+ """
33
+
34
+ probability_threshold: float = 0.85
35
+ """The minimum required probability score assigned to the cell (ROI) by the suite2p classifier. Cells with a lower
36
+ classifier score are excluded from processing."""
37
+
38
+ maximum_size: int = 1000
39
+ """The maximum allowed cell (ROI) size, in pixels. Cells with a larger pixel size are excluded from processing."""
40
+
41
+ mesoscope_stripe_borders: list[int] = field(default_factory=list)
42
+ """Stores the x-coordinates of mesoscope combined image stripe (ROI) borders. For mesoscope images, 'stripes' are
43
+ the individual imaging ROIs acquired in the 'multiple-ROI' mode. If this field is not overwritten by the user, the
44
+ pipeline will read the border data from the combined plane 'ops.npy' file generated by single-day suite2p pipeline.
45
+ """
46
+
47
+ stripe_margin: int = 30
48
+ """The minimum required distance, in pixels, between the center-point (the median x-coordinate) of the cell (ROI)
49
+ and the mesoscope stripe border. Cells that are too close to stripe borders are excluded from processing to avoid
50
+ ambiguities associated with tracking cells that span multiple stripes."""
51
+
52
+
53
+ @dataclass()
54
+ class Registration:
55
+ """Stores parameters for aligning (registering) the sessions from multiple days to the same visual space.
56
+
57
+ Registration is used to create a 'shared' visual space, allowing to track the same cells (ROIs) across otherwise
58
+ variable visual space of each session.
59
+ """
60
+
61
+ image_type: str = "enhanced"
62
+ """The type of single-day suite2p-generated image to use for across-day registration. Supported options are
63
+ 'enhanced', 'mean' and 'max'. This 'template' image is used to calculate the necessary deformation (transformations)
64
+ to register (align) all sessions to the same visual space."""
65
+
66
+ grid_sampling_factor: float = 1
67
+ """Determines to what extent the grid sampling scales with the deformed image scale. Has to be between 0 and 1. By
68
+ making this value lower than 1, the grid is relatively fine at the the higher scales, allowing for more
69
+ deformations. This is used when resizing session images as part of the registration process."""
70
+
71
+ scale_sampling: int = 30
72
+ """The number of iterations for each level (i.e. between each factor two in scale) to perform when computing the
73
+ deformations. Values between 20 and 30 are reasonable in most situations, but higher values yield better results in
74
+ general. The speed of the algorithm scales linearly with this value."""
75
+
76
+ speed_factor: float = 3
77
+ """The relative force of the deformation transform applied when registering the sessions to the same visual space.
78
+ This is the most important parameter to tune."""
79
+
80
+
81
+ @dataclass()
82
+ class Clustering:
83
+ """Stores parameters for clustering cell (ROI) masks across multiple registered sessions.
84
+
85
+ Clustering is used to track cells across sessions. If a group of ROIs across sessions is clustered together, it
86
+ is likely that they represent the same cell (ROI) across all sessions. This process involves first creating a
87
+ 'template' mask that tracks a cell using the registered (deformed) visual space and then using this template to
88
+ track the cell in the original (non-deformed) visual space of each session.
89
+ """
90
+
91
+ criterion: str = "distance"
92
+ """Specifies the criterion for clustering (grouping) cell (ROI) masks from different sessions. Currently, the only
93
+ valid option is 'distance'."""
94
+
95
+ threshold: float = 0.75
96
+ """Specifies the threshold for the clustering algorithm. Cell masks will be clustered (grouped) together if their
97
+ clustering criterion is below this threshold value."""
98
+
99
+ mask_prevalence: int = 50
100
+ """Specifies the minimum percentage of all registered sessions that must include the clustered cell mask. Cell masks
101
+ present in fewer percent of sessions than this value are excluded from processing. This parameter is used to isolate
102
+ the cells that are present (active) across sessions."""
103
+
104
+ pixel_prevalence: int = 50
105
+ """Specifies the minimum percentage of all registered sessions in which a pixel from a given cell mask must be
106
+ present for it to be used to construct the template mask. Pixels present in fewer percent of sessions than this
107
+ value are not used to define the 'template' mask coordinates. Template masks are used to extract the cell
108
+ fluorescence from the 'original' visual space of every session. This parameter is used to isolate the part of the
109
+ cell that is stable across sessions."""
110
+
111
+ step_sizes: list[int] = field(default_factory=lambda: [200, 200])
112
+ """Specifies the block size for the clustering process, in pixels. Clustering is applied in blocks of this size,
113
+ sampled across the processed plane image, to reduce the memory (RAM) overhead."""
114
+
115
+ bin_size: int = 50
116
+ """Specifies the size of bins used to discover cell masks within blocks during clustering. To avoid edge cases, the
117
+ algorithm clusters the cell masks within the region defined by the center-point of each cell +- bin_size."""
118
+
119
+ maximum_distance: int = 20
120
+ """Specifies the maximum distance, in pixels, that can separate masks across multiple sessions. The clustering
121
+ algorithm will consider cell masks located at most within this distance from each-other across days as the same
122
+ cells during tacking."""
123
+
124
+ minimum_size: int = 25
125
+ """The minimum size of the non-overlapping (with other cells) cell (ROI) region, in pixels, that has to be covered
126
+ by the template mask, for the cell to be assigned to that template. This is used to determine which template(s) the
127
+ cell belongs to (if any), for the purpose of tracking it across sessions."""
128
+
129
+
130
+ @dataclass()
131
+ class Demix:
132
+ """Stores settings used to deconvolve fluorescence signals from cells tracked across multiple days.
133
+
134
+ This step applies the suite2p spike deconvolution algorithm to the cell masks isolated during clustering to extract
135
+ the fluorescence of the cells tracked across multiple sessions (days). Generally, it should use the same parameters
136
+ as were used by the single-day suite2p pipeline.
137
+ """
138
+
139
+ baseline: str = "maximin"
140
+ """Specifies the method to compute the baseline of each trace. This baseline is then subtracted from each cell.
141
+ ‘maximin’ computes a moving baseline by filtering the data with a Gaussian of width 'sig_baseline' * 'fs', and then
142
+ minimum filtering with a window of 'win_baseline' * 'fs', and then maximum filtering with the same window.
143
+ ‘constant’ computes a constant baseline by filtering with a Gaussian of width 'sig_baseline' * 'fs' and then taking
144
+ the minimum value of this filtered trace. ‘constant_percentile’ computes a constant baseline by taking the
145
+ 'prctile_baseline' percentile of the trace."""
146
+
147
+ win_baseline: float = 60.0
148
+ """The time window, in seconds, over which to compute the baseline filter."""
149
+
150
+ sig_baseline: float = 10.0
151
+ """The standard deviation, in seconds, of the Gaussian filter applied to smooth the baseline signal."""
152
+
153
+ l2_reg: float = 0.1
154
+ """The L2 regularization strength applied during spike deconvolution."""
155
+
156
+ neucoeff: float = 0.7
157
+ """The neuropil coefficient applied for signal correction before deconvolution."""
158
+
159
+
160
+ @dataclass()
161
+ class MultiDayS2PConfiguration(YamlConfig):
162
+ """Aggregates all parameters for the multi-day suite2p pipeline used to track cells across multiple days
163
+ (sessions) and extract their activity.
164
+
165
+ These settings are used to configure the multiday suite2p extraction pipeline, which is based on the reference
166
+ implementation here: https://github.com/sprustonlab/multiday-suite2p-public. This class behaves similar to the
167
+ SingleDayS2PConfiguration class. It can be saved and loaded from a .YAML file and translated to dictionary format,
168
+ expected by the multi-day sl-suite2p pipeline.
169
+ """
170
+
171
+ cell_detection: CellDetection = field(default_factory=CellDetection)
172
+ """Stores parameters for selecting single-day-registered cells (ROIs) to be tracked across multiple sessions
173
+ (days)."""
174
+ registration: Registration = field(default_factory=Registration)
175
+ """Stores parameters for aligning (registering) the sessions from multiple days to the same visual space."""
176
+ clustering: Clustering = field(default_factory=Clustering)
177
+ """Stores parameters for clustering (tracking) cell (ROI) masks across multiple registered sessions."""
178
+ demix: Demix = field(default_factory=Demix)
179
+ """Stores settings used to deconvolve fluorescence signals from cells tracked across multiple days."""
180
+ io: IO = field(default_factory=IO)
181
+ """Stores parameters that control data input and output during various stages of the pipeline."""
182
+
183
+ def to_ops(self) -> dict[str, Any]:
184
+ """Converts the class instance to a dictionary and returns it to caller.
185
+
186
+ This dictionary can be passed to sl-suite2p multi-day functions as the 'ops' argument.
187
+
188
+ Notes:
189
+ Unlike the single-day configuration class, the dictionary generated by this method uses section names as
190
+ top level keys and parameter names as second-level keys. This mimics the original multiday-pipeline
191
+ configuration scheme.
192
+ """
193
+ return asdict(self)
@@ -0,0 +1,99 @@
1
+ from typing import Any
2
+ from dataclasses import field, dataclass
3
+
4
+ from _typeshed import Incomplete
5
+ from ataraxis_data_structures import YamlConfig
6
+
7
+ @dataclass()
8
+ class IO:
9
+ """Stores parameters that control data input and output during various stages of the pipeline."""
10
+
11
+ sessions: list[str] = field(default_factory=list)
12
+ mesoscan: bool = ...
13
+
14
+ @dataclass()
15
+ class CellDetection:
16
+ """Stores parameters for selecting single-day-registered cells (ROIs) to be tracked across multiple sessions (days).
17
+
18
+ To maximize the tracking pipeline reliability, it is beneficial to pre-filter the cells whose identity (as cells)
19
+ is not certain or that may be hard to track across sessions.
20
+ """
21
+
22
+ probability_threshold: float = ...
23
+ maximum_size: int = ...
24
+ mesoscope_stripe_borders: list[int] = field(default_factory=list)
25
+ stripe_margin: int = ...
26
+
27
+ @dataclass()
28
+ class Registration:
29
+ """Stores parameters for aligning (registering) the sessions from multiple days to the same visual space.
30
+
31
+ Registration is used to create a 'shared' visual space, allowing to track the same cells (ROIs) across otherwise
32
+ variable visual space of each session.
33
+ """
34
+
35
+ image_type: str = ...
36
+ grid_sampling_factor: float = ...
37
+ scale_sampling: int = ...
38
+ speed_factor: float = ...
39
+
40
+ @dataclass()
41
+ class Clustering:
42
+ """Stores parameters for clustering cell (ROI) masks across multiple registered sessions.
43
+
44
+ Clustering is used to track cells across sessions. If a group of ROIs across sessions is clustered together, it
45
+ is likely that they represent the same cell (ROI) across all sessions. This process involves first creating a
46
+ 'template' mask that tracks a cell using the registered (deformed) visual space and then using this template to
47
+ track the cell in the original (non-deformed) visual space of each session.
48
+ """
49
+
50
+ criterion: str = ...
51
+ threshold: float = ...
52
+ mask_prevalence: int = ...
53
+ pixel_prevalence: int = ...
54
+ step_sizes: list[int] = field(default_factory=Incomplete)
55
+ bin_size: int = ...
56
+ maximum_distance: int = ...
57
+ minimum_size: int = ...
58
+
59
+ @dataclass()
60
+ class Demix:
61
+ """Stores settings used to deconvolve fluorescence signals from cells tracked across multiple days.
62
+
63
+ This step applies the suite2p spike deconvolution algorithm to the cell masks isolated during clustering to extract
64
+ the fluorescence of the cells tracked across multiple sessions (days). Generally, it should use the same parameters
65
+ as were used by the single-day suite2p pipeline.
66
+ """
67
+
68
+ baseline: str = ...
69
+ win_baseline: float = ...
70
+ sig_baseline: float = ...
71
+ l2_reg: float = ...
72
+ neucoeff: float = ...
73
+
74
+ @dataclass()
75
+ class MultiDayS2PConfiguration(YamlConfig):
76
+ """Aggregates all parameters for the multi-day suite2p pipeline used to track cells across multiple days
77
+ (sessions) and extract their activity.
78
+
79
+ These settings are used to configure the multiday suite2p extraction pipeline, which is based on the reference
80
+ implementation here: https://github.com/sprustonlab/multiday-suite2p-public. This class behaves similar to the
81
+ SingleDayS2PConfiguration class. It can be saved and loaded from a .YAML file and translated to dictionary format,
82
+ expected by the multi-day sl-suite2p pipeline.
83
+ """
84
+
85
+ cell_detection: CellDetection = field(default_factory=CellDetection)
86
+ registration: Registration = field(default_factory=Registration)
87
+ clustering: Clustering = field(default_factory=Clustering)
88
+ demix: Demix = field(default_factory=Demix)
89
+ io: IO = field(default_factory=IO)
90
+ def to_ops(self) -> dict[str, Any]:
91
+ """Converts the class instance to a dictionary and returns it to caller.
92
+
93
+ This dictionary can be passed to sl-suite2p multi-day functions as the 'ops' argument.
94
+
95
+ Notes:
96
+ Unlike the single-day configuration class, the dictionary generated by this method uses section names as
97
+ top level keys and parameter names as second-level keys. This mimics the original multiday-pipeline
98
+ configuration scheme.
99
+ """