sl-shared-assets 1.0.0rc14__py3-none-any.whl → 1.0.0rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (33) hide show
  1. sl_shared_assets/__init__.py +21 -9
  2. sl_shared_assets/cli.py +1 -1
  3. sl_shared_assets/data_classes/__init__.py +63 -0
  4. sl_shared_assets/data_classes/configuration_data.py +64 -0
  5. sl_shared_assets/data_classes/runtime_data.py +233 -0
  6. sl_shared_assets/{data_classes.py → data_classes/session_data.py} +60 -481
  7. sl_shared_assets/data_classes/surgery_data.py +152 -0
  8. sl_shared_assets/server/__init__.py +8 -0
  9. sl_shared_assets/server/job.py +140 -0
  10. sl_shared_assets/server/server.py +213 -0
  11. sl_shared_assets/suite2p/__init__.py +8 -0
  12. sl_shared_assets/suite2p/multi_day.py +225 -0
  13. sl_shared_assets/suite2p/single_day.py +563 -0
  14. sl_shared_assets/tools/__init__.py +8 -0
  15. sl_shared_assets/{ascension_tools.py → tools/ascension_tools.py} +3 -2
  16. {sl_shared_assets-1.0.0rc14.dist-info → sl_shared_assets-1.0.0rc16.dist-info}/METADATA +1 -1
  17. sl_shared_assets-1.0.0rc16.dist-info/RECORD +23 -0
  18. sl_shared_assets/__init__.pyi +0 -51
  19. sl_shared_assets/ascension_tools.pyi +0 -68
  20. sl_shared_assets/cli.pyi +0 -28
  21. sl_shared_assets/data_classes.pyi +0 -799
  22. sl_shared_assets/packaging_tools.pyi +0 -52
  23. sl_shared_assets/server.py +0 -300
  24. sl_shared_assets/server.pyi +0 -117
  25. sl_shared_assets/suite2p.py +0 -456
  26. sl_shared_assets/suite2p.pyi +0 -188
  27. sl_shared_assets/transfer_tools.pyi +0 -53
  28. sl_shared_assets-1.0.0rc14.dist-info/RECORD +0 -22
  29. /sl_shared_assets/{packaging_tools.py → tools/packaging_tools.py} +0 -0
  30. /sl_shared_assets/{transfer_tools.py → tools/transfer_tools.py} +0 -0
  31. {sl_shared_assets-1.0.0rc14.dist-info → sl_shared_assets-1.0.0rc16.dist-info}/WHEEL +0 -0
  32. {sl_shared_assets-1.0.0rc14.dist-info → sl_shared_assets-1.0.0rc16.dist-info}/entry_points.txt +0 -0
  33. {sl_shared_assets-1.0.0rc14.dist-info → sl_shared_assets-1.0.0rc16.dist-info}/licenses/LICENSE +0 -0
@@ -1,52 +0,0 @@
1
- from pathlib import Path
2
-
3
- def _calculate_file_checksum(base_directory: Path, file_path: Path) -> tuple[str, bytes]:
4
- """Calculates xxHash3-128 checksum for a single file and its path relative to the base directory.
5
-
6
- This function is passed to parallel workers used by the calculate_directory_hash() method that iteratively
7
- calculates the checksum for all files inside a directory. Each call to this function returns the checksum for the
8
- target file, which includes both the contents of the file and its path relative to the base directory.
9
-
10
- Args:
11
- base_directory: The path to the base (root) directory which is being checksummed by the main
12
- 'calculate_directory_checksum' function.
13
- file_path: The absolute path to the target file.
14
-
15
- Returns:
16
- A tuple with two elements. The first element is the path to the file relative to the base directory. The second
17
- element is the xxHash3-128 checksum that covers the relative path and the contents of the file.
18
- """
19
-
20
- def calculate_directory_checksum(
21
- directory: Path, num_processes: int | None = None, batch: bool = False, save_checksum: bool = True
22
- ) -> str:
23
- """Calculates xxHash3-128 checksum for the input directory, which includes the data of all contained files and
24
- the directory structure information.
25
-
26
- This function is used to generate a checksum for the raw_data directory of each experiment or training session.
27
- Checksums are used to verify the session data integrity during transmission between the PC that acquired the data
28
- and long-term storage locations, such as the Synology NAS or the BioHPC server. The function can be configured to
29
- write the generated checksum as a hexadecimal string to the ax_checksum.txt file stored at the highest level of the
30
- input directory.
31
-
32
- Note:
33
- This method uses multiprocessing to efficiently parallelize checksum calculation for multiple files. In
34
- combination with xxHash3, this achieves a significant speedup over more common checksums, such as MD5 and
35
- SHA256. Note that xxHash3 is not suitable for security purposes and is only used to ensure data integrity.
36
-
37
- The method notifies the user about the checksum calculation process via the terminal.
38
-
39
- The returned checksum accounts for both the contents of each file and the layout of the input directory
40
- structure.
41
-
42
- Args:
43
- directory: The Path to the directory to be checksummed.
44
- num_processes: The number of CPU processes to use for parallelizing checksum calculation. If set to None, the
45
- function defaults to using (logical CPU count - 4).
46
- batch: Determines whether the function is called as part of batch-processing multiple directories. This is used
47
- to optimize progress reporting to avoid cluttering the terminal.
48
- save_checksum: Determines whether the checksum should be saved (written to) a .txt file.
49
-
50
- Returns:
51
- The xxHash3-128 checksum for the input directory as a hexadecimal string.
52
- """
@@ -1,300 +0,0 @@
1
- """This module provides the tools for working with the Sun lab BioHPC cluster. Specifically, the classes from this
2
- module establish an API for submitting jobs to the shared data processing cluster (managed via SLURM) and monitoring
3
- the running job status. All lab processing and analysis pipelines use this interface for accessing shared compute
4
- resources.
5
- """
6
-
7
- import re
8
- import time
9
- from pathlib import Path
10
- import datetime
11
- from dataclasses import dataclass
12
-
13
- import paramiko
14
-
15
- # noinspection PyProtectedMember
16
- from simple_slurm import Slurm # type: ignore
17
- from paramiko.client import SSHClient
18
- from ataraxis_base_utilities import LogLevel, console
19
- from ataraxis_data_structures import YamlConfig
20
-
21
-
22
- def generate_server_credentials(
23
- output_directory: Path, username: str, password: str, host: str = "cbsuwsun.biohpc.cornell.edu"
24
- ) -> None:
25
- """Generates a new server_credentials.yaml file under the specified directory, using input information.
26
-
27
- This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
28
- only used when setting up new host-computers in the lab.
29
- """
30
- ServerCredentials(username=username, password=password, host=host).to_yaml(
31
- file_path=output_directory.joinpath("server_credentials.yaml")
32
- )
33
-
34
-
35
- @dataclass()
36
- class ServerCredentials(YamlConfig):
37
- """This class stores the hostname and credentials used to log into the BioHPC cluster to run Sun lab processing
38
- pipelines.
39
-
40
- Primarily, this is used as part of the sl-experiment library runtime to start data processing once it is
41
- transferred to the BioHPC server during preprocessing. However, the same file can be used together with the Server
42
- class API to run any computation jobs on the lab's BioHPC server.
43
- """
44
-
45
- username: str = "YourNetID"
46
- """The username to use for server authentication."""
47
- password: str = "YourPassword"
48
- """The password to use for server authentication."""
49
- host: str = "cbsuwsun.biohpc.cornell.edu"
50
- """The hostname or IP address of the server to connect to."""
51
-
52
-
53
- class Server:
54
- """Encapsulates access to the Sun lab BioHPC processing server.
55
-
56
- This class provides the API that allows accessing the BioHPC server to create and submit various SLURM-managed jobs
57
- to the server. It functions as the central interface used by all processing pipelines in the lab to execute costly
58
- data processing on the server.
59
-
60
- Notes:
61
- All lab processing pipelines expect the data to be stored on the server and all processing logic to be packaged
62
- and installed into dedicated conda environments on the server.
63
-
64
- This class assumes that the target server has SLURM job manager installed and accessible to the user whose
65
- credentials are used to connect to the server as part of this class instantiation.
66
-
67
- Args:
68
- credentials_path: The path to the locally stored .yaml file that contains the server hostname and access
69
- credentials.
70
-
71
- Attributes:
72
- _open: Tracks whether the connection to the server is open or not.
73
- _client: Stores the initialized SSHClient instance used to interface with the server.
74
- """
75
-
76
- def __init__(self, credentials_path: Path) -> None:
77
- # Tracker used to prevent __del__ from classing stop() for a partially initialized class.
78
- self._open: bool = False
79
-
80
- # Loads the credentials from the provided .yaml file
81
- self._credentials: ServerCredentials = ServerCredentials.from_yaml(credentials_path) # type: ignore
82
-
83
- # Establishes the SSH connection to the specified processing server. At most, attempts to connect to the server
84
- # 30 times before terminating with an error
85
- attempt = 0
86
- while True:
87
- console.echo(
88
- f"Trying to connect to {self._credentials.host} (attempt {attempt}/30)...", level=LogLevel.INFO
89
- )
90
- try:
91
- self._client: SSHClient = paramiko.SSHClient()
92
- self._client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
93
- self._client.connect(
94
- self._credentials.host, username=self._credentials.username, password=self._credentials.password
95
- )
96
- console.echo(f"Connected to {self._credentials.host}", level=LogLevel.SUCCESS)
97
- break
98
- except paramiko.AuthenticationException:
99
- message = (
100
- f"Authentication failed when connecting to {self._credentials.host} using "
101
- f"{self._credentials.username} user."
102
- )
103
- console.error(message, RuntimeError)
104
- raise RuntimeError
105
- except:
106
- if attempt == 30:
107
- message = f"Could not connect to {self._credentials.host} after 30 attempts. Aborting runtime."
108
- console.error(message, RuntimeError)
109
- raise RuntimeError
110
-
111
- console.echo(
112
- f"Could not SSH to {self._credentials.host}, retrying after a 2-second delay...",
113
- level=LogLevel.WARNING,
114
- )
115
- attempt += 1
116
- time.sleep(2)
117
-
118
- def __del__(self) -> None:
119
- """If the instance is connected to the server, terminates the connection before the instance is destroyed."""
120
- self.close()
121
-
122
- @staticmethod
123
- def generate_slurm_header(
124
- job_name: str, output_log: Path, error_log: Path, cpus_to_use: int = 10, ram_gb: int = 10, time_limit: int = 60
125
- ) -> Slurm:
126
- """Creates a SLURM command object and fills it with initial job configuration data.
127
-
128
- This method is used to generate the initial SLURM command object and fill it with job (SLURM) configuration and
129
- conda shell initialization data. It is used by all processing pipelines in the lab as the initial configuration
130
- point when writing job shell scripts.
131
-
132
- Notes:
133
- The command header generated by this method does not contain the command to initialize the specific conda
134
- environment to be used during processing. This has to be provided as part of the additional command
135
- configuration, typically by adding the "source activate {ENV_NAME}" subcommand to the end of the header
136
- returned by this method.
137
-
138
- Args:
139
- job_name: The descriptive name of the SLURM job to be created.
140
- output_log: The absolute path to the .txt file on the processing server, where to store the standard output
141
- data of the job.
142
- error_log: The absolute path to the .txt file on the processing server, where to store the standard error
143
- data of the job.
144
- cpus_to_use: The number of CPUs to use for the job.
145
- ram_gb: The amount of RAM to allocate for the job in Gigabytes.
146
- time_limit: The maximum time limit for the job, in minutes. It is highly advised to set an adequate maximum
147
- runtime limit to prevent jobs from hogging the server for a long period of time.
148
- """
149
-
150
- # Builds the slurm command object filled with configuration information
151
- slurm_command = Slurm(
152
- cpus_per_task=cpus_to_use,
153
- job_name=job_name,
154
- output=str(output_log),
155
- error=str(error_log),
156
- mem=f"{ram_gb}G",
157
- time=datetime.timedelta(minutes=time_limit),
158
- )
159
-
160
- # Adds commands to initialize conda shell as part of the job runtime
161
- slurm_command.add_cmd("eval $(conda shell.bash hook)")
162
- slurm_command.add_cmd("conda init bash")
163
-
164
- return slurm_command
165
-
166
- def submit_job(self, slurm_command: Slurm, working_directory: Path) -> str:
167
- """Submits the input SLURM command to the managed BioHPC server via the shell script.
168
-
169
- This method submits various commands for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
170
- method translates the Slurm object into the shell script, moves the script to the target working directory on
171
- the server, and instructs the server to execute the shell script (via SLURM).
172
-
173
- Args:
174
- slurm_command: The Slurm (command) object containing the job configuration and individual commands to run
175
- as part of the processing pipeline.
176
- working_directory: The path to the working directory on the server where the shell script is moved
177
- and executed.
178
-
179
- Returns:
180
- The job ID assigned to the job by SLURM manager if the command submission is successful.
181
-
182
- Raises:
183
- RuntimeError: If the command submission to the server fails.
184
- """
185
-
186
- # Extracts the job name from the slurm command text and uses it to generate the name for the remote script
187
- job_name_pattern = r"#SBATCH\s+--job-name\s+(\S+)"
188
- match = re.search(job_name_pattern, str(slurm_command))
189
- if match is None:
190
- message = (
191
- f"Failed to submit the job to the BioHPC cluster. It appears that the job does not contain the "
192
- f"expected SLURM job header. All jobs submitted via this method have to be initialized using the "
193
- f"generate_slurm_header() class method."
194
- )
195
- console.error(message, RuntimeError)
196
- raise RuntimeError(message) # This is a fallback to appease mypy. It should not be reachable.
197
- job_name = match.group(1)
198
-
199
- # Resolves the paths to the local and remote (server-side) .sh script files.
200
- local_script_path = Path("temp_script.sh")
201
- remote_script_path = str(working_directory.joinpath(f"{job_name}.sh"))
202
-
203
- # Appends the command to clean up (remove) the temporary script file after processing runtime is over
204
- slurm_command.add_cmd(f"rm -f {remote_script_path}")
205
-
206
- # Translates the command to string format
207
- script_content = str(slurm_command)
208
-
209
- # Replaces escaped $ (/$) with $. This is essential, as without this correction things like conda
210
- # initialization would not work as expected.
211
- fixed_script_content = script_content.replace("\\$", "$")
212
-
213
- # Creates a temporary script file locally and dumps translated command data into the file
214
- with open(local_script_path, "w") as f:
215
- f.write(fixed_script_content)
216
-
217
- # Uploads the command script to the server
218
- sftp = self._client.open_sftp()
219
- sftp.put(localpath=local_script_path, remotepath=remote_script_path)
220
- sftp.close()
221
-
222
- # Removes the temporary local .sh file
223
- local_script_path.unlink()
224
-
225
- # Makes the server-side script executable
226
- self._client.exec_command(f"chmod +x {remote_script_path}")
227
-
228
- # Submits the job to SLURM with sbatch and verifies submission state by returning either the ID of the job or
229
- # None to indicate no job has been submitted.
230
- job_output = self._client.exec_command(f"sbatch {remote_script_path}")[1].read().strip().decode()
231
- if "Submitted batch job" in job_output:
232
- return job_output.split()[-1]
233
- else:
234
- message = f"Failed to submit the {job_name} job to the BioHPC cluster."
235
- console.error(message, RuntimeError)
236
-
237
- # Fallback to appease mypy, should not be reachable
238
- raise RuntimeError(message)
239
-
240
- def job_complete(self, job_id: str) -> bool:
241
- """Returns True if the job with the given ID has been completed or terminated its runtime due to an error.
242
-
243
- If the job is still running or is waiting inside the execution queue, returns False.
244
-
245
- Args:
246
- job_id: The numeric ID of the job to check, assigned by SLURM.
247
- """
248
- if j_id not in self._client.exec_command(f"squeue -j {job_id}")[1].read().decode().strip():
249
- return True
250
- else:
251
- return False
252
-
253
- def close(self) -> None:
254
- """Closes the SSH connection to the server.
255
-
256
- This method has to be called before destroying the class instance to ensure proper resource cleanup.
257
- """
258
- # Prevents closing already closed connections
259
- if self._open:
260
- self._client.close()
261
-
262
-
263
- if __name__ == "__main__":
264
- # Creates SSHClient for server access
265
- console.enable()
266
- cred_path = Path("/home/cyberaxolotl/Desktop/test/server_credentials.yaml")
267
- server = Server(credentials_path=cred_path)
268
-
269
- # Generates SLURM job header
270
- slurm = server.generate_slurm_header(
271
- job_name="test_job",
272
- output_log=Path("/workdir/cbsuwsun/test_job_stdout.txt"),
273
- error_log=Path("/workdir/cbsuwsun/test_job_stderr.txt"),
274
- cpus_to_use=1,
275
- )
276
-
277
- # Adds test runtime command
278
- slurm.add_cmd("python --version > /workdir/cbsuwsun/mamba_version.txt")
279
-
280
- # Submits the job to the server
281
- j_id = server.submit_job(slurm_command=slurm, working_directory=Path("/workdir/cbsuwsun/"))
282
-
283
- if j_id:
284
- console.echo(f"Successfully submitted job with ID {j_id} to the server.", level=LogLevel.SUCCESS)
285
-
286
- max_wait_time = 60 # Maximum wait time in seconds
287
- wait_interval = 1 # Check every 1 second
288
- elapsed_time = 0
289
-
290
- while elapsed_time < max_wait_time:
291
- if server.job_complete(job_id=j_id):
292
- console.echo("Job completed", level=LogLevel.SUCCESS)
293
- break
294
-
295
- console.echo(f"Job still running. Waiting {wait_interval} seconds...", level=LogLevel.INFO)
296
- time.sleep(wait_interval)
297
- elapsed_time += wait_interval
298
-
299
- # Close the connection
300
- server.close()
@@ -1,117 +0,0 @@
1
- from pathlib import Path
2
- from dataclasses import dataclass
3
-
4
- from simple_slurm import Slurm
5
- from paramiko.client import SSHClient as SSHClient
6
- from ataraxis_data_structures import YamlConfig
7
-
8
- def generate_server_credentials(
9
- output_directory: Path, username: str, password: str, host: str = "cbsuwsun.biohpc.cornell.edu"
10
- ) -> None:
11
- """Generates a new server_credentials.yaml file under the specified directory, using input information.
12
-
13
- This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
14
- only used when setting up new host-computers in the lab.
15
- """
16
- @dataclass()
17
- class ServerCredentials(YamlConfig):
18
- """This class stores the hostname and credentials used to log into the BioHPC cluster to run Sun lab processing
19
- pipelines.
20
-
21
- Primarily, this is used as part of the sl-experiment library runtime to start data processing once it is
22
- transferred to the BioHPC server during preprocessing. However, the same file can be used together with the Server
23
- class API to run any computation jobs on the lab's BioHPC server.
24
- """
25
-
26
- username: str = ...
27
- password: str = ...
28
- host: str = ...
29
-
30
- class Server:
31
- """Encapsulates access to the Sun lab BioHPC processing server.
32
-
33
- This class provides the API that allows accessing the BioHPC server to create and submit various SLURM-managed jobs
34
- to the server. It functions as the central interface used by all processing pipelines in the lab to execute costly
35
- data processing on the server.
36
-
37
- Notes:
38
- All lab processing pipelines expect the data to be stored on the server and all processing logic to be packaged
39
- and installed into dedicated conda environments on the server.
40
-
41
- This class assumes that the target server has SLURM job manager installed and accessible to the user whose
42
- credentials are used to connect to the server as part of this class instantiation.
43
-
44
- Args:
45
- credentials_path: The path to the locally stored .yaml file that contains the server hostname and access
46
- credentials.
47
-
48
- Attributes:
49
- _open: Tracks whether the connection to the server is open or not.
50
- _client: Stores the initialized SSHClient instance used to interface with the server.
51
- """
52
-
53
- _open: bool
54
- _credentials: ServerCredentials
55
- _client: SSHClient
56
- def __init__(self, credentials_path: Path) -> None: ...
57
- def __del__(self) -> None:
58
- """If the instance is connected to the server, terminates the connection before the instance is destroyed."""
59
- @staticmethod
60
- def generate_slurm_header(
61
- job_name: str, output_log: Path, error_log: Path, cpus_to_use: int = 10, ram_gb: int = 10, time_limit: int = 60
62
- ) -> Slurm:
63
- """Creates a SLURM command object and fills it with initial job configuration data.
64
-
65
- This method is used to generate the initial SLURM command object and fill it with job (SLURM) configuration and
66
- conda shell initialization data. It is used by all processing pipelines in the lab as the initial configuration
67
- point when writing job shell scripts.
68
-
69
- Notes:
70
- The command header generated by this method does not contain the command to initialize the specific conda
71
- environment to be used during processing. This has to be provided as part of the additional command
72
- configuration, typically by adding the "source activate {ENV_NAME}" subcommand to the end of the header
73
- returned by this method.
74
-
75
- Args:
76
- job_name: The descriptive name of the SLURM job to be created.
77
- output_log: The absolute path to the .txt file on the processing server, where to store the standard output
78
- data of the job.
79
- error_log: The absolute path to the .txt file on the processing server, where to store the standard error
80
- data of the job.
81
- cpus_to_use: The number of CPUs to use for the job.
82
- ram_gb: The amount of RAM to allocate for the job in Gigabytes.
83
- time_limit: The maximum time limit for the job, in minutes. It is highly advised to set an adequate maximum
84
- runtime limit to prevent jobs from hogging the server for a long period of time.
85
- """
86
- def submit_job(self, slurm_command: Slurm, working_directory: Path) -> str:
87
- """Submits the input SLURM command to the managed BioHPC server via the shell script.
88
-
89
- This method submits various commands for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
90
- method translates the Slurm object into the shell script, moves the script to the target working directory on
91
- the server, and instructs the server to execute the shell script (via SLURM).
92
-
93
- Args:
94
- slurm_command: The Slurm (command) object containing the job configuration and individual commands to run
95
- as part of the processing pipeline.
96
- working_directory: The path to the working directory on the server where the shell script is moved
97
- and executed.
98
-
99
- Returns:
100
- The job ID assigned to the job by SLURM manager if the command submission is successful.
101
-
102
- Raises:
103
- RuntimeError: If the command submission to the server fails.
104
- """
105
- def job_complete(self, job_id: str) -> bool:
106
- """Returns True if the job with the given ID has been completed or terminated its runtime due to an error.
107
-
108
- If the job is still running or is waiting inside the execution queue, returns False.
109
-
110
- Args:
111
- job_id: The numeric ID of the job to check, assigned by SLURM.
112
- """
113
- def close(self) -> None:
114
- """Closes the SSH connection to the server.
115
-
116
- This method has to be called before destroying the class instance to ensure proper resource cleanup.
117
- """