sl-shared-assets 1.0.0rc13__py3-none-any.whl → 1.0.0rc15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +27 -9
- sl_shared_assets/__init__.pyi +71 -0
- sl_shared_assets/cli.py +13 -14
- sl_shared_assets/cli.pyi +28 -0
- sl_shared_assets/data_classes/__init__.py +63 -0
- sl_shared_assets/data_classes/__init__.pyi +61 -0
- sl_shared_assets/data_classes/configuration_data.py +64 -0
- sl_shared_assets/data_classes/configuration_data.pyi +37 -0
- sl_shared_assets/data_classes/runtime_data.py +233 -0
- sl_shared_assets/data_classes/runtime_data.pyi +145 -0
- sl_shared_assets/data_classes/session_data.py +1275 -0
- sl_shared_assets/data_classes/session_data.pyi +527 -0
- sl_shared_assets/data_classes/surgery_data.py +152 -0
- sl_shared_assets/data_classes/surgery_data.pyi +89 -0
- sl_shared_assets/server/__init__.py +8 -0
- sl_shared_assets/server/__init__.pyi +8 -0
- sl_shared_assets/server/job.py +140 -0
- sl_shared_assets/server/job.pyi +94 -0
- sl_shared_assets/server/server.py +213 -0
- sl_shared_assets/server/server.pyi +95 -0
- sl_shared_assets/suite2p/__init__.py +8 -0
- sl_shared_assets/suite2p/__init__.pyi +4 -0
- sl_shared_assets/suite2p/multi_day.py +193 -0
- sl_shared_assets/suite2p/multi_day.pyi +99 -0
- sl_shared_assets/{suite2p.py → suite2p/single_day.py} +55 -32
- sl_shared_assets/suite2p/single_day.pyi +192 -0
- sl_shared_assets/tools/__init__.py +8 -0
- sl_shared_assets/tools/__init__.pyi +5 -0
- sl_shared_assets/{ascension_tools.py → tools/ascension_tools.py} +3 -6
- sl_shared_assets/tools/ascension_tools.pyi +68 -0
- sl_shared_assets/tools/packaging_tools.pyi +52 -0
- sl_shared_assets/tools/transfer_tools.pyi +53 -0
- {sl_shared_assets-1.0.0rc13.dist-info → sl_shared_assets-1.0.0rc15.dist-info}/METADATA +1 -1
- sl_shared_assets-1.0.0rc15.dist-info/RECORD +40 -0
- sl_shared_assets/data_classes.py +0 -1656
- sl_shared_assets/server.py +0 -293
- sl_shared_assets-1.0.0rc13.dist-info/RECORD +0 -14
- /sl_shared_assets/{packaging_tools.py → tools/packaging_tools.py} +0 -0
- /sl_shared_assets/{transfer_tools.py → tools/transfer_tools.py} +0 -0
- {sl_shared_assets-1.0.0rc13.dist-info → sl_shared_assets-1.0.0rc15.dist-info}/WHEEL +0 -0
- {sl_shared_assets-1.0.0rc13.dist-info → sl_shared_assets-1.0.0rc15.dist-info}/entry_points.txt +0 -0
- {sl_shared_assets-1.0.0rc13.dist-info → sl_shared_assets-1.0.0rc15.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
"""This module provides the tools for working with the Sun lab BioHPC cluster. Specifically, the classes from this
|
|
2
|
+
module establish an API for submitting jobs to the shared data processing cluster (managed via SLURM) and monitoring
|
|
3
|
+
the running job status. All lab processing and analysis pipelines use this interface for accessing shared compute
|
|
4
|
+
resources.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import time
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import tempfile
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
|
|
12
|
+
import paramiko
|
|
13
|
+
|
|
14
|
+
# noinspection PyProtectedMember
|
|
15
|
+
from simple_slurm import Slurm # type: ignore
|
|
16
|
+
from paramiko.client import SSHClient
|
|
17
|
+
from ataraxis_base_utilities import LogLevel, console
|
|
18
|
+
from ataraxis_data_structures import YamlConfig
|
|
19
|
+
|
|
20
|
+
from .job import Job
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def generate_server_credentials(
|
|
24
|
+
output_directory: Path, username: str, password: str, host: str = "cbsuwsun.biohpc.cornell.edu"
|
|
25
|
+
) -> None:
|
|
26
|
+
"""Generates a new server_credentials.yaml file under the specified directory, using input information.
|
|
27
|
+
|
|
28
|
+
This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
|
|
29
|
+
only used when setting up new host-computers in the lab.
|
|
30
|
+
"""
|
|
31
|
+
ServerCredentials(username=username, password=password, host=host).to_yaml(
|
|
32
|
+
file_path=output_directory.joinpath("server_credentials.yaml")
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass()
|
|
37
|
+
class ServerCredentials(YamlConfig):
|
|
38
|
+
"""This class stores the hostname and credentials used to log into the BioHPC cluster to run Sun lab processing
|
|
39
|
+
pipelines.
|
|
40
|
+
|
|
41
|
+
Primarily, this is used as part of the sl-experiment library runtime to start data processing once it is
|
|
42
|
+
transferred to the BioHPC server during preprocessing. However, the same file can be used together with the Server
|
|
43
|
+
class API to run any computation jobs on the lab's BioHPC server.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
username: str = "YourNetID"
|
|
47
|
+
"""The username to use for server authentication."""
|
|
48
|
+
password: str = "YourPassword"
|
|
49
|
+
"""The password to use for server authentication."""
|
|
50
|
+
host: str = "cbsuwsun.biohpc.cornell.edu"
|
|
51
|
+
"""The hostname or IP address of the server to connect to."""
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class Server:
|
|
55
|
+
"""Encapsulates access to the Sun lab BioHPC processing server.
|
|
56
|
+
|
|
57
|
+
This class provides the API that allows accessing the BioHPC server to create and submit various SLURM-managed jobs
|
|
58
|
+
to the server. It functions as the central interface used by all processing pipelines in the lab to execute costly
|
|
59
|
+
data processing on the server.
|
|
60
|
+
|
|
61
|
+
Notes:
|
|
62
|
+
All lab processing pipelines expect the data to be stored on the server and all processing logic to be packaged
|
|
63
|
+
and installed into dedicated conda environments on the server.
|
|
64
|
+
|
|
65
|
+
This class assumes that the target server has SLURM job manager installed and accessible to the user whose
|
|
66
|
+
credentials are used to connect to the server as part of this class instantiation.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
credentials_path: The path to the locally stored .yaml file that contains the server hostname and access
|
|
70
|
+
credentials.
|
|
71
|
+
|
|
72
|
+
Attributes:
|
|
73
|
+
_open: Tracks whether the connection to the server is open or not.
|
|
74
|
+
_client: Stores the initialized SSHClient instance used to interface with the server.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(self, credentials_path: Path) -> None:
|
|
78
|
+
# Tracker used to prevent __del__ from classing stop() for a partially initialized class.
|
|
79
|
+
self._open: bool = False
|
|
80
|
+
|
|
81
|
+
# Loads the credentials from the provided .yaml file
|
|
82
|
+
self._credentials: ServerCredentials = ServerCredentials.from_yaml(credentials_path) # type: ignore
|
|
83
|
+
|
|
84
|
+
# Establishes the SSH connection to the specified processing server. At most, attempts to connect to the server
|
|
85
|
+
# 30 times before terminating with an error
|
|
86
|
+
attempt = 0
|
|
87
|
+
while True:
|
|
88
|
+
console.echo(
|
|
89
|
+
f"Trying to connect to {self._credentials.host} (attempt {attempt}/30)...", level=LogLevel.INFO
|
|
90
|
+
)
|
|
91
|
+
try:
|
|
92
|
+
self._client: SSHClient = paramiko.SSHClient()
|
|
93
|
+
self._client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
|
94
|
+
self._client.connect(
|
|
95
|
+
self._credentials.host, username=self._credentials.username, password=self._credentials.password
|
|
96
|
+
)
|
|
97
|
+
console.echo(f"Connected to {self._credentials.host}", level=LogLevel.SUCCESS)
|
|
98
|
+
break
|
|
99
|
+
except paramiko.AuthenticationException:
|
|
100
|
+
message = (
|
|
101
|
+
f"Authentication failed when connecting to {self._credentials.host} using "
|
|
102
|
+
f"{self._credentials.username} user."
|
|
103
|
+
)
|
|
104
|
+
console.error(message, RuntimeError)
|
|
105
|
+
raise RuntimeError
|
|
106
|
+
except:
|
|
107
|
+
if attempt == 30:
|
|
108
|
+
message = f"Could not connect to {self._credentials.host} after 30 attempts. Aborting runtime."
|
|
109
|
+
console.error(message, RuntimeError)
|
|
110
|
+
raise RuntimeError
|
|
111
|
+
|
|
112
|
+
console.echo(
|
|
113
|
+
f"Could not SSH to {self._credentials.host}, retrying after a 2-second delay...",
|
|
114
|
+
level=LogLevel.WARNING,
|
|
115
|
+
)
|
|
116
|
+
attempt += 1
|
|
117
|
+
time.sleep(2)
|
|
118
|
+
|
|
119
|
+
def __del__(self) -> None:
|
|
120
|
+
"""If the instance is connected to the server, terminates the connection before the instance is destroyed."""
|
|
121
|
+
self.close()
|
|
122
|
+
|
|
123
|
+
def submit_job(self, job: Job) -> Job:
|
|
124
|
+
"""Submits the input job to the managed BioHPC server via SLURM job manager.
|
|
125
|
+
|
|
126
|
+
This method submits various jobs for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
|
|
127
|
+
method translates the Job object into the shell script, moves the script to the target working directory on
|
|
128
|
+
the server, and instructs the server to execute the shell script (via SLURM).
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
job: The Job object that contains all job data.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
The job object whose 'job_id' attribute had been modified with the job ID, if the job was successfully
|
|
135
|
+
submitted.
|
|
136
|
+
|
|
137
|
+
Raises:
|
|
138
|
+
RuntimeError: If job submission to the server fails.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
# Generates a temporary shell script on the local machine. Uses tempfile to automatically remove the
|
|
142
|
+
# local script as soon as it is uploaded to the server.
|
|
143
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
144
|
+
local_script_path = Path(temp_dir).joinpath(f"{job.job_name}.sh")
|
|
145
|
+
fixed_script_content = job.command_script
|
|
146
|
+
|
|
147
|
+
# Creates a temporary script file locally and dumps translated command data into the file
|
|
148
|
+
with open(local_script_path, "w") as f:
|
|
149
|
+
f.write(fixed_script_content)
|
|
150
|
+
|
|
151
|
+
# Uploads the command script to the server
|
|
152
|
+
sftp = self._client.open_sftp()
|
|
153
|
+
sftp.put(localpath=local_script_path, remotepath=job.remote_script_path)
|
|
154
|
+
sftp.close()
|
|
155
|
+
|
|
156
|
+
# Makes the server-side script executable
|
|
157
|
+
self._client.exec_command(f"chmod +x {job.remote_script_path}")
|
|
158
|
+
|
|
159
|
+
# Submits the job to SLURM with sbatch and verifies submission state
|
|
160
|
+
job_output = self._client.exec_command(f"sbatch {job.remote_script_path}")[1].read().strip().decode()
|
|
161
|
+
|
|
162
|
+
# If batch_job is not in the output received from SLURM in response to issuing the submission command, raises an
|
|
163
|
+
# error.
|
|
164
|
+
if "Submitted batch job" not in job_output:
|
|
165
|
+
message = f"Failed to submit the {job.job_name} job to the BioHPC cluster."
|
|
166
|
+
console.error(message, RuntimeError)
|
|
167
|
+
|
|
168
|
+
# Fallback to appease mypy, should not be reachable
|
|
169
|
+
raise RuntimeError(message)
|
|
170
|
+
|
|
171
|
+
# Otherwise, extracts the job id assigned to the job by SLURM from the response and writes it to the processed
|
|
172
|
+
# Job object
|
|
173
|
+
job_id = job_output.split()[-1]
|
|
174
|
+
job.job_id = job_id
|
|
175
|
+
return job
|
|
176
|
+
|
|
177
|
+
def job_complete(self, job: Job) -> bool:
|
|
178
|
+
"""Returns True if the job managed by the input Job instance has been completed or terminated its runtime due
|
|
179
|
+
to an error.
|
|
180
|
+
|
|
181
|
+
If the job is still running or is waiting inside the execution queue, returns False.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
job: The Job object whose status needs to be checked.
|
|
185
|
+
|
|
186
|
+
Raises:
|
|
187
|
+
ValueError: If the input Job object does not contain a valid job_id, suggesting that it has not been
|
|
188
|
+
submitted to the server.
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
if job.job_id is None:
|
|
192
|
+
message = (
|
|
193
|
+
f"The input Job object for the job {job.job_name} does not contain a valid job_id. This indicates that "
|
|
194
|
+
f"the job has not been submitted to the server."
|
|
195
|
+
)
|
|
196
|
+
console.error(message, ValueError)
|
|
197
|
+
|
|
198
|
+
# This is here to appease mypy, it should not be reachable
|
|
199
|
+
raise ValueError(message)
|
|
200
|
+
|
|
201
|
+
if job.job_id not in self._client.exec_command(f"squeue -j {job.job_id}")[1].read().decode().strip():
|
|
202
|
+
return True
|
|
203
|
+
else:
|
|
204
|
+
return False
|
|
205
|
+
|
|
206
|
+
def close(self) -> None:
|
|
207
|
+
"""Closes the SSH connection to the server.
|
|
208
|
+
|
|
209
|
+
This method has to be called before destroying the class instance to ensure proper resource cleanup.
|
|
210
|
+
"""
|
|
211
|
+
# Prevents closing already closed connections
|
|
212
|
+
if self._open:
|
|
213
|
+
self._client.close()
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
from simple_slurm import Slurm as Slurm
|
|
5
|
+
from paramiko.client import SSHClient as SSHClient
|
|
6
|
+
from ataraxis_data_structures import YamlConfig
|
|
7
|
+
|
|
8
|
+
from .job import Job as Job
|
|
9
|
+
|
|
10
|
+
def generate_server_credentials(
|
|
11
|
+
output_directory: Path, username: str, password: str, host: str = "cbsuwsun.biohpc.cornell.edu"
|
|
12
|
+
) -> None:
|
|
13
|
+
"""Generates a new server_credentials.yaml file under the specified directory, using input information.
|
|
14
|
+
|
|
15
|
+
This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
|
|
16
|
+
only used when setting up new host-computers in the lab.
|
|
17
|
+
"""
|
|
18
|
+
@dataclass()
|
|
19
|
+
class ServerCredentials(YamlConfig):
|
|
20
|
+
"""This class stores the hostname and credentials used to log into the BioHPC cluster to run Sun lab processing
|
|
21
|
+
pipelines.
|
|
22
|
+
|
|
23
|
+
Primarily, this is used as part of the sl-experiment library runtime to start data processing once it is
|
|
24
|
+
transferred to the BioHPC server during preprocessing. However, the same file can be used together with the Server
|
|
25
|
+
class API to run any computation jobs on the lab's BioHPC server.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
username: str = ...
|
|
29
|
+
password: str = ...
|
|
30
|
+
host: str = ...
|
|
31
|
+
|
|
32
|
+
class Server:
|
|
33
|
+
"""Encapsulates access to the Sun lab BioHPC processing server.
|
|
34
|
+
|
|
35
|
+
This class provides the API that allows accessing the BioHPC server to create and submit various SLURM-managed jobs
|
|
36
|
+
to the server. It functions as the central interface used by all processing pipelines in the lab to execute costly
|
|
37
|
+
data processing on the server.
|
|
38
|
+
|
|
39
|
+
Notes:
|
|
40
|
+
All lab processing pipelines expect the data to be stored on the server and all processing logic to be packaged
|
|
41
|
+
and installed into dedicated conda environments on the server.
|
|
42
|
+
|
|
43
|
+
This class assumes that the target server has SLURM job manager installed and accessible to the user whose
|
|
44
|
+
credentials are used to connect to the server as part of this class instantiation.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
credentials_path: The path to the locally stored .yaml file that contains the server hostname and access
|
|
48
|
+
credentials.
|
|
49
|
+
|
|
50
|
+
Attributes:
|
|
51
|
+
_open: Tracks whether the connection to the server is open or not.
|
|
52
|
+
_client: Stores the initialized SSHClient instance used to interface with the server.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
_open: bool
|
|
56
|
+
_credentials: ServerCredentials
|
|
57
|
+
_client: SSHClient
|
|
58
|
+
def __init__(self, credentials_path: Path) -> None: ...
|
|
59
|
+
def __del__(self) -> None:
|
|
60
|
+
"""If the instance is connected to the server, terminates the connection before the instance is destroyed."""
|
|
61
|
+
def submit_job(self, job: Job) -> Job:
|
|
62
|
+
"""Submits the input job to the managed BioHPC server via SLURM job manager.
|
|
63
|
+
|
|
64
|
+
This method submits various jobs for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
|
|
65
|
+
method translates the Job object into the shell script, moves the script to the target working directory on
|
|
66
|
+
the server, and instructs the server to execute the shell script (via SLURM).
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
job: The Job object that contains all job data.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
The job object whose 'job_id' attribute had been modified with the job ID, if the job was successfully
|
|
73
|
+
submitted.
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
RuntimeError: If job submission to the server fails.
|
|
77
|
+
"""
|
|
78
|
+
def job_complete(self, job: Job) -> bool:
|
|
79
|
+
"""Returns True if the job managed by the input Job instance has been completed or terminated its runtime due
|
|
80
|
+
to an error.
|
|
81
|
+
|
|
82
|
+
If the job is still running or is waiting inside the execution queue, returns False.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
job: The Job object whose status needs to be checked.
|
|
86
|
+
|
|
87
|
+
Raises:
|
|
88
|
+
ValueError: If the input Job object does not contain a valid job_id, suggesting that it has not been
|
|
89
|
+
submitted to the server.
|
|
90
|
+
"""
|
|
91
|
+
def close(self) -> None:
|
|
92
|
+
"""Closes the SSH connection to the server.
|
|
93
|
+
|
|
94
|
+
This method has to be called before destroying the class instance to ensure proper resource cleanup.
|
|
95
|
+
"""
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""This package provides the configuration classes used by the Sun lab maintained version of the suite2p library
|
|
2
|
+
(sl-suite2p package, https://github.com/Sun-Lab-NBB/suite2p) to process brain activity data within and across sessions
|
|
3
|
+
(days)."""
|
|
4
|
+
|
|
5
|
+
from .multi_day import MultiDayS2PConfiguration
|
|
6
|
+
from .single_day import SingleDayS2PConfiguration
|
|
7
|
+
|
|
8
|
+
__all__ = ["MultiDayS2PConfiguration", "SingleDayS2PConfiguration"]
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""This module stores the classes used to configure the multi-day (across-session) sl-suite2p pipeline. This pipeline
|
|
2
|
+
extends the original suite2p code to support tracking the same objects (cells) across multiple days. Both single-day
|
|
3
|
+
(original) and multi-day (extended) pipelines are available as part of the Sun lab maintained sl-suite2p package."""
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
from dataclasses import field, asdict, dataclass
|
|
7
|
+
|
|
8
|
+
from ataraxis_data_structures import YamlConfig
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass()
|
|
12
|
+
class IO:
|
|
13
|
+
"""Stores parameters that control data input and output during various stages of the pipeline."""
|
|
14
|
+
|
|
15
|
+
sessions: list[str] = field(default_factory=list)
|
|
16
|
+
"""Specifies the list of sessions to register across days, as absolute paths to their /suite2p directories
|
|
17
|
+
e.g: root/project/animal/session/processed_data/suite2p. The suite2p directory is created as part of the
|
|
18
|
+
'single-day' suite2p runtime, assuming the default value of the 'save_folder' SingleDayS2PConfiguration class
|
|
19
|
+
attribute was not modified. Note, each suite2p directory has to contain the 'combined' plane folder, which is
|
|
20
|
+
created if the 'combined' SingleDayS2PConfiguration class attribute is 'True'."""
|
|
21
|
+
|
|
22
|
+
mesoscan: bool = True
|
|
23
|
+
"""Indicates whether the processed session /suite2p folders contain registered Mesoscope frames."""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass()
|
|
27
|
+
class CellDetection:
|
|
28
|
+
"""Stores parameters for selecting single-day-registered cells (ROIs) to be tracked across multiple sessions (days).
|
|
29
|
+
|
|
30
|
+
To maximize the tracking pipeline reliability, it is beneficial to pre-filter the cells whose identity (as cells)
|
|
31
|
+
is not certain or that may be hard to track across sessions.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
probability_threshold: float = 0.85
|
|
35
|
+
"""The minimum required probability score assigned to the cell (ROI) by the suite2p classifier. Cells with a lower
|
|
36
|
+
classifier score are excluded from processing."""
|
|
37
|
+
|
|
38
|
+
maximum_size: int = 1000
|
|
39
|
+
"""The maximum allowed cell (ROI) size, in pixels. Cells with a larger pixel size are excluded from processing."""
|
|
40
|
+
|
|
41
|
+
mesoscope_stripe_borders: list[int] = field(default_factory=list)
|
|
42
|
+
"""Stores the x-coordinates of mesoscope combined image stripe (ROI) borders. For mesoscope images, 'stripes' are
|
|
43
|
+
the individual imaging ROIs acquired in the 'multiple-ROI' mode. If this field is not overwritten by the user, the
|
|
44
|
+
pipeline will read the border data from the combined plane 'ops.npy' file generated by single-day suite2p pipeline.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
stripe_margin: int = 30
|
|
48
|
+
"""The minimum required distance, in pixels, between the center-point (the median x-coordinate) of the cell (ROI)
|
|
49
|
+
and the mesoscope stripe border. Cells that are too close to stripe borders are excluded from processing to avoid
|
|
50
|
+
ambiguities associated with tracking cells that span multiple stripes."""
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass()
|
|
54
|
+
class Registration:
|
|
55
|
+
"""Stores parameters for aligning (registering) the sessions from multiple days to the same visual space.
|
|
56
|
+
|
|
57
|
+
Registration is used to create a 'shared' visual space, allowing to track the same cells (ROIs) across otherwise
|
|
58
|
+
variable visual space of each session.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
image_type: str = "enhanced"
|
|
62
|
+
"""The type of single-day suite2p-generated image to use for across-day registration. Supported options are
|
|
63
|
+
'enhanced', 'mean' and 'max'. This 'template' image is used to calculate the necessary deformation (transformations)
|
|
64
|
+
to register (align) all sessions to the same visual space."""
|
|
65
|
+
|
|
66
|
+
grid_sampling_factor: float = 1
|
|
67
|
+
"""Determines to what extent the grid sampling scales with the deformed image scale. Has to be between 0 and 1. By
|
|
68
|
+
making this value lower than 1, the grid is relatively fine at the the higher scales, allowing for more
|
|
69
|
+
deformations. This is used when resizing session images as part of the registration process."""
|
|
70
|
+
|
|
71
|
+
scale_sampling: int = 30
|
|
72
|
+
"""The number of iterations for each level (i.e. between each factor two in scale) to perform when computing the
|
|
73
|
+
deformations. Values between 20 and 30 are reasonable in most situations, but higher values yield better results in
|
|
74
|
+
general. The speed of the algorithm scales linearly with this value."""
|
|
75
|
+
|
|
76
|
+
speed_factor: float = 3
|
|
77
|
+
"""The relative force of the deformation transform applied when registering the sessions to the same visual space.
|
|
78
|
+
This is the most important parameter to tune."""
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass()
|
|
82
|
+
class Clustering:
|
|
83
|
+
"""Stores parameters for clustering cell (ROI) masks across multiple registered sessions.
|
|
84
|
+
|
|
85
|
+
Clustering is used to track cells across sessions. If a group of ROIs across sessions is clustered together, it
|
|
86
|
+
is likely that they represent the same cell (ROI) across all sessions. This process involves first creating a
|
|
87
|
+
'template' mask that tracks a cell using the registered (deformed) visual space and then using this template to
|
|
88
|
+
track the cell in the original (non-deformed) visual space of each session.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
criterion: str = "distance"
|
|
92
|
+
"""Specifies the criterion for clustering (grouping) cell (ROI) masks from different sessions. Currently, the only
|
|
93
|
+
valid option is 'distance'."""
|
|
94
|
+
|
|
95
|
+
threshold: float = 0.75
|
|
96
|
+
"""Specifies the threshold for the clustering algorithm. Cell masks will be clustered (grouped) together if their
|
|
97
|
+
clustering criterion is below this threshold value."""
|
|
98
|
+
|
|
99
|
+
mask_prevalence: int = 50
|
|
100
|
+
"""Specifies the minimum percentage of all registered sessions that must include the clustered cell mask. Cell masks
|
|
101
|
+
present in fewer percent of sessions than this value are excluded from processing. This parameter is used to isolate
|
|
102
|
+
the cells that are present (active) across sessions."""
|
|
103
|
+
|
|
104
|
+
pixel_prevalence: int = 50
|
|
105
|
+
"""Specifies the minimum percentage of all registered sessions in which a pixel from a given cell mask must be
|
|
106
|
+
present for it to be used to construct the template mask. Pixels present in fewer percent of sessions than this
|
|
107
|
+
value are not used to define the 'template' mask coordinates. Template masks are used to extract the cell
|
|
108
|
+
fluorescence from the 'original' visual space of every session. This parameter is used to isolate the part of the
|
|
109
|
+
cell that is stable across sessions."""
|
|
110
|
+
|
|
111
|
+
step_sizes: list[int] = field(default_factory=lambda: [200, 200])
|
|
112
|
+
"""Specifies the block size for the clustering process, in pixels. Clustering is applied in blocks of this size,
|
|
113
|
+
sampled across the processed plane image, to reduce the memory (RAM) overhead."""
|
|
114
|
+
|
|
115
|
+
bin_size: int = 50
|
|
116
|
+
"""Specifies the size of bins used to discover cell masks within blocks during clustering. To avoid edge cases, the
|
|
117
|
+
algorithm clusters the cell masks within the region defined by the center-point of each cell +- bin_size."""
|
|
118
|
+
|
|
119
|
+
maximum_distance: int = 20
|
|
120
|
+
"""Specifies the maximum distance, in pixels, that can separate masks across multiple sessions. The clustering
|
|
121
|
+
algorithm will consider cell masks located at most within this distance from each-other across days as the same
|
|
122
|
+
cells during tacking."""
|
|
123
|
+
|
|
124
|
+
minimum_size: int = 25
|
|
125
|
+
"""The minimum size of the non-overlapping (with other cells) cell (ROI) region, in pixels, that has to be covered
|
|
126
|
+
by the template mask, for the cell to be assigned to that template. This is used to determine which template(s) the
|
|
127
|
+
cell belongs to (if any), for the purpose of tracking it across sessions."""
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@dataclass()
|
|
131
|
+
class Demix:
|
|
132
|
+
"""Stores settings used to deconvolve fluorescence signals from cells tracked across multiple days.
|
|
133
|
+
|
|
134
|
+
This step applies the suite2p spike deconvolution algorithm to the cell masks isolated during clustering to extract
|
|
135
|
+
the fluorescence of the cells tracked across multiple sessions (days). Generally, it should use the same parameters
|
|
136
|
+
as were used by the single-day suite2p pipeline.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
baseline: str = "maximin"
|
|
140
|
+
"""Specifies the method to compute the baseline of each trace. This baseline is then subtracted from each cell.
|
|
141
|
+
‘maximin’ computes a moving baseline by filtering the data with a Gaussian of width 'sig_baseline' * 'fs', and then
|
|
142
|
+
minimum filtering with a window of 'win_baseline' * 'fs', and then maximum filtering with the same window.
|
|
143
|
+
‘constant’ computes a constant baseline by filtering with a Gaussian of width 'sig_baseline' * 'fs' and then taking
|
|
144
|
+
the minimum value of this filtered trace. ‘constant_percentile’ computes a constant baseline by taking the
|
|
145
|
+
'prctile_baseline' percentile of the trace."""
|
|
146
|
+
|
|
147
|
+
win_baseline: float = 60.0
|
|
148
|
+
"""The time window, in seconds, over which to compute the baseline filter."""
|
|
149
|
+
|
|
150
|
+
sig_baseline: float = 10.0
|
|
151
|
+
"""The standard deviation, in seconds, of the Gaussian filter applied to smooth the baseline signal."""
|
|
152
|
+
|
|
153
|
+
l2_reg: float = 0.1
|
|
154
|
+
"""The L2 regularization strength applied during spike deconvolution."""
|
|
155
|
+
|
|
156
|
+
neucoeff: float = 0.7
|
|
157
|
+
"""The neuropil coefficient applied for signal correction before deconvolution."""
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@dataclass()
|
|
161
|
+
class MultiDayS2PConfiguration(YamlConfig):
|
|
162
|
+
"""Aggregates all parameters for the multi-day suite2p pipeline used to track cells across multiple days
|
|
163
|
+
(sessions) and extract their activity.
|
|
164
|
+
|
|
165
|
+
These settings are used to configure the multiday suite2p extraction pipeline, which is based on the reference
|
|
166
|
+
implementation here: https://github.com/sprustonlab/multiday-suite2p-public. This class behaves similar to the
|
|
167
|
+
SingleDayS2PConfiguration class. It can be saved and loaded from a .YAML file and translated to dictionary format,
|
|
168
|
+
expected by the multi-day sl-suite2p pipeline.
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
cell_detection: CellDetection = field(default_factory=CellDetection)
|
|
172
|
+
"""Stores parameters for selecting single-day-registered cells (ROIs) to be tracked across multiple sessions
|
|
173
|
+
(days)."""
|
|
174
|
+
registration: Registration = field(default_factory=Registration)
|
|
175
|
+
"""Stores parameters for aligning (registering) the sessions from multiple days to the same visual space."""
|
|
176
|
+
clustering: Clustering = field(default_factory=Clustering)
|
|
177
|
+
"""Stores parameters for clustering (tracking) cell (ROI) masks across multiple registered sessions."""
|
|
178
|
+
demix: Demix = field(default_factory=Demix)
|
|
179
|
+
"""Stores settings used to deconvolve fluorescence signals from cells tracked across multiple days."""
|
|
180
|
+
io: IO = field(default_factory=IO)
|
|
181
|
+
"""Stores parameters that control data input and output during various stages of the pipeline."""
|
|
182
|
+
|
|
183
|
+
def to_ops(self) -> dict[str, Any]:
|
|
184
|
+
"""Converts the class instance to a dictionary and returns it to caller.
|
|
185
|
+
|
|
186
|
+
This dictionary can be passed to sl-suite2p multi-day functions as the 'ops' argument.
|
|
187
|
+
|
|
188
|
+
Notes:
|
|
189
|
+
Unlike the single-day configuration class, the dictionary generated by this method uses section names as
|
|
190
|
+
top level keys and parameter names as second-level keys. This mimics the original multiday-pipeline
|
|
191
|
+
configuration scheme.
|
|
192
|
+
"""
|
|
193
|
+
return asdict(self)
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
from dataclasses import field, dataclass
|
|
3
|
+
|
|
4
|
+
from _typeshed import Incomplete
|
|
5
|
+
from ataraxis_data_structures import YamlConfig
|
|
6
|
+
|
|
7
|
+
@dataclass()
|
|
8
|
+
class IO:
|
|
9
|
+
"""Stores parameters that control data input and output during various stages of the pipeline."""
|
|
10
|
+
|
|
11
|
+
sessions: list[str] = field(default_factory=list)
|
|
12
|
+
mesoscan: bool = ...
|
|
13
|
+
|
|
14
|
+
@dataclass()
|
|
15
|
+
class CellDetection:
|
|
16
|
+
"""Stores parameters for selecting single-day-registered cells (ROIs) to be tracked across multiple sessions (days).
|
|
17
|
+
|
|
18
|
+
To maximize the tracking pipeline reliability, it is beneficial to pre-filter the cells whose identity (as cells)
|
|
19
|
+
is not certain or that may be hard to track across sessions.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
probability_threshold: float = ...
|
|
23
|
+
maximum_size: int = ...
|
|
24
|
+
mesoscope_stripe_borders: list[int] = field(default_factory=list)
|
|
25
|
+
stripe_margin: int = ...
|
|
26
|
+
|
|
27
|
+
@dataclass()
|
|
28
|
+
class Registration:
|
|
29
|
+
"""Stores parameters for aligning (registering) the sessions from multiple days to the same visual space.
|
|
30
|
+
|
|
31
|
+
Registration is used to create a 'shared' visual space, allowing to track the same cells (ROIs) across otherwise
|
|
32
|
+
variable visual space of each session.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
image_type: str = ...
|
|
36
|
+
grid_sampling_factor: float = ...
|
|
37
|
+
scale_sampling: int = ...
|
|
38
|
+
speed_factor: float = ...
|
|
39
|
+
|
|
40
|
+
@dataclass()
|
|
41
|
+
class Clustering:
|
|
42
|
+
"""Stores parameters for clustering cell (ROI) masks across multiple registered sessions.
|
|
43
|
+
|
|
44
|
+
Clustering is used to track cells across sessions. If a group of ROIs across sessions is clustered together, it
|
|
45
|
+
is likely that they represent the same cell (ROI) across all sessions. This process involves first creating a
|
|
46
|
+
'template' mask that tracks a cell using the registered (deformed) visual space and then using this template to
|
|
47
|
+
track the cell in the original (non-deformed) visual space of each session.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
criterion: str = ...
|
|
51
|
+
threshold: float = ...
|
|
52
|
+
mask_prevalence: int = ...
|
|
53
|
+
pixel_prevalence: int = ...
|
|
54
|
+
step_sizes: list[int] = field(default_factory=Incomplete)
|
|
55
|
+
bin_size: int = ...
|
|
56
|
+
maximum_distance: int = ...
|
|
57
|
+
minimum_size: int = ...
|
|
58
|
+
|
|
59
|
+
@dataclass()
|
|
60
|
+
class Demix:
|
|
61
|
+
"""Stores settings used to deconvolve fluorescence signals from cells tracked across multiple days.
|
|
62
|
+
|
|
63
|
+
This step applies the suite2p spike deconvolution algorithm to the cell masks isolated during clustering to extract
|
|
64
|
+
the fluorescence of the cells tracked across multiple sessions (days). Generally, it should use the same parameters
|
|
65
|
+
as were used by the single-day suite2p pipeline.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
baseline: str = ...
|
|
69
|
+
win_baseline: float = ...
|
|
70
|
+
sig_baseline: float = ...
|
|
71
|
+
l2_reg: float = ...
|
|
72
|
+
neucoeff: float = ...
|
|
73
|
+
|
|
74
|
+
@dataclass()
|
|
75
|
+
class MultiDayS2PConfiguration(YamlConfig):
|
|
76
|
+
"""Aggregates all parameters for the multi-day suite2p pipeline used to track cells across multiple days
|
|
77
|
+
(sessions) and extract their activity.
|
|
78
|
+
|
|
79
|
+
These settings are used to configure the multiday suite2p extraction pipeline, which is based on the reference
|
|
80
|
+
implementation here: https://github.com/sprustonlab/multiday-suite2p-public. This class behaves similar to the
|
|
81
|
+
SingleDayS2PConfiguration class. It can be saved and loaded from a .YAML file and translated to dictionary format,
|
|
82
|
+
expected by the multi-day sl-suite2p pipeline.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
cell_detection: CellDetection = field(default_factory=CellDetection)
|
|
86
|
+
registration: Registration = field(default_factory=Registration)
|
|
87
|
+
clustering: Clustering = field(default_factory=Clustering)
|
|
88
|
+
demix: Demix = field(default_factory=Demix)
|
|
89
|
+
io: IO = field(default_factory=IO)
|
|
90
|
+
def to_ops(self) -> dict[str, Any]:
|
|
91
|
+
"""Converts the class instance to a dictionary and returns it to caller.
|
|
92
|
+
|
|
93
|
+
This dictionary can be passed to sl-suite2p multi-day functions as the 'ops' argument.
|
|
94
|
+
|
|
95
|
+
Notes:
|
|
96
|
+
Unlike the single-day configuration class, the dictionary generated by this method uses section names as
|
|
97
|
+
top level keys and parameter names as second-level keys. This mimics the original multiday-pipeline
|
|
98
|
+
configuration scheme.
|
|
99
|
+
"""
|