sl-shared-assets 1.0.0rc8__py3-none-any.whl → 1.0.0rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

@@ -1,52 +0,0 @@
1
- from pathlib import Path
2
-
3
- def _calculate_file_checksum(base_directory: Path, file_path: Path) -> tuple[str, bytes]:
4
- """Calculates xxHash3-128 checksum for a single file and its path relative to the base directory.
5
-
6
- This function is passed to parallel workers used by the calculate_directory_hash() method that iteratively
7
- calculates the checksum for all files inside a directory. Each call to this function returns the checksum for the
8
- target file, which includes both the contents of the file and its path relative to the base directory.
9
-
10
- Args:
11
- base_directory: The path to the base (root) directory which is being checksummed by the main
12
- 'calculate_directory_checksum' function.
13
- file_path: The absolute path to the target file.
14
-
15
- Returns:
16
- A tuple with two elements. The first element is the path to the file relative to the base directory. The second
17
- element is the xxHash3-128 checksum that covers the relative path and the contents of the file.
18
- """
19
-
20
- def calculate_directory_checksum(
21
- directory: Path, num_processes: int | None = None, batch: bool = False, save_checksum: bool = True
22
- ) -> str:
23
- """Calculates xxHash3-128 checksum for the input directory, which includes the data of all contained files and
24
- the directory structure information.
25
-
26
- This function is used to generate a checksum for the raw_data directory of each experiment or training session.
27
- Checksums are used to verify the session data integrity during transmission between the PC that acquired the data
28
- and long-term storage locations, such as the Synology NAS or the BioHPC server. The function can be configured to
29
- write the generated checksum as a hexadecimal string to the ax_checksum.txt file stored at the highest level of the
30
- input directory.
31
-
32
- Note:
33
- This method uses multiprocessing to efficiently parallelize checksum calculation for multiple files. In
34
- combination with xxHash3, this achieves a significant speedup over more common checksums, such as MD5 and
35
- SHA256. Note that xxHash3 is not suitable for security purposes and is only used to ensure data integrity.
36
-
37
- The method notifies the user about the checksum calculation process via the terminal.
38
-
39
- The returned checksum accounts for both the contents of each file and the layout of the input directory
40
- structure.
41
-
42
- Args:
43
- directory: The Path to the directory to be checksummed.
44
- num_processes: The number of CPU processes to use for parallelizing checksum calculation. If set to None, the
45
- function defaults to using (logical CPU count - 4).
46
- batch: Determines whether the function is called as part of batch-processing multiple directories. This is used
47
- to optimize progress reporting to avoid cluttering the terminal.
48
- save_checksum: Determines whether the checksum should be saved (written to) a .txt file.
49
-
50
- Returns:
51
- The xxHash3-128 checksum for the input directory as a hexadecimal string.
52
- """
@@ -1,112 +0,0 @@
1
- from pathlib import Path
2
- from dataclasses import dataclass
3
-
4
- from simple_slurm import Slurm
5
- from paramiko.client import SSHClient as SSHClient
6
- from ataraxis_data_structures import YamlConfig
7
-
8
- def generate_server_credentials(
9
- output_directory: Path, username: str, password: str, host: str = "cbsuwsun.biohpc.cornell.edu"
10
- ) -> None:
11
- """Generates a new server_credentials.yaml file under the specified directory, using input information.
12
-
13
- This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
14
- only used when setting up new host-computers in the lab.
15
- """
16
- @dataclass()
17
- class ServerCredentials(YamlConfig):
18
- """This class stores the hostname and credentials used to log into the BioHPC cluster to run Sun lab processing
19
- pipelines.
20
-
21
- Primarily, this is used as part of the sl-experiment library runtime to start data processing once it is
22
- transferred to the BioHPC server during preprocessing.
23
- """
24
-
25
- username: str = ...
26
- password: str = ...
27
- host: str = ...
28
-
29
- class Server:
30
- """Encapsulates access to the Sun lab BioHPC processing server.
31
-
32
- This class provides the API that allows accessing the BioHPC server and creating and submitting various
33
- SLURM-managed jobs to the server. It functions as the central interface used by all processing pipelines in the
34
- lab to execute costly data processing on the server.
35
-
36
- Notes:
37
- All lab processing pipelines expect the data to be stored on the server and all processing logic to be packaged
38
- and installed into dedicated conda environments on the server.
39
-
40
- Args:
41
- credentials_path: The path to the.yaml file containing the server hostname and access credentials.
42
-
43
- Attributes:
44
- _open: Tracks whether the connection to the server is open or not.
45
- _client: Stores the initialized SSHClient instance used to interface with the server.
46
- """
47
-
48
- _open: bool
49
- _credentials: ServerCredentials
50
- _client: SSHClient
51
- def __init__(self, credentials_path: Path) -> None: ...
52
- def __del__(self) -> None:
53
- """If the instance is connected to the server, terminates the connection before the instance is destroyed."""
54
- @staticmethod
55
- def generate_slurm_header(
56
- job_name: str, output_log: Path, error_log: Path, cpus_to_use: int = 20, ram_gb: int = 4, time_limit: int = 60
57
- ) -> Slurm:
58
- """Creates a SLURM command object and fills it with initial job configuration data.
59
-
60
- This method is used to generate the initial SLURM command object and fill it with job (SLURM) configuration and
61
- (general!) conda initialization data. It is used by all processing pipelines in the lab as the initial
62
- configuration point when writing job shell scripts.
63
-
64
- Notes:
65
- The command header generated by this method does not contain the command to initialize the specific conda
66
- environment to be used during processing. This has to be provided as part of the additional command
67
- configuration, typically by adding the "source activate {ENV_NAME}" subcommand to the end of the header
68
- returned by this method.
69
-
70
- Args:
71
- job_name: The descriptive name of the SLURM job to be created.
72
- output_log: The path to the .txt file on the processing server, where to store the standard output of the
73
- job.
74
- error_log: The path to the .txt file on the processing server, where to store the standard error of the
75
- job.
76
- cpus_to_use: The number of CPUs to use for the job.
77
- ram_gb: The amount of RAM to allocate for the job in Gigabytes.
78
- time_limit: The maximum time limit for the job, in minutes. It is highly advised to set an adequate maximum
79
- runtime limit to prevent jobs from hogging the server for a long period of time.
80
- """
81
- def submit_job(self, slurm_command: Slurm, working_directory: Path) -> str:
82
- """Submits the input SLURM command to the managed BioHPC server via the shell script.
83
-
84
- This method submits various commands for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
85
- method translates the Slurm object into the shell script, moves the script to the target working directory on
86
- the server, and instructs the server to execute the shell script (via SLURM).
87
-
88
- Args:
89
- slurm_command: The Slurm (command) object containing the job configuration and individual commands to run
90
- as part of the processing pipeline.
91
- working_directory: The path to the working directory on the server where the shell script is moved
92
- and executed.
93
-
94
- Returns:
95
- The job ID assigned to the job by SLURM manager if the command submission is successful.
96
-
97
- Raises:
98
- RuntimeError: If the command submission to the server fails.
99
- """
100
- def job_complete(self, job_id: str) -> bool:
101
- """Returns True if the job with the given ID has been completed or terminated its runtime due to an error.
102
-
103
- If the job is still running or is waiting inside the execution queue, returns False.
104
-
105
- Args:
106
- job_id: The numeric ID of the job to check, assigned by SLURM.
107
- """
108
- def close(self) -> None:
109
- """Closes the SSH connection to the server.
110
-
111
- This method has to be called before destroying the class instance to ensure proper resource cleanup.
112
- """
@@ -1,188 +0,0 @@
1
- from typing import Any
2
- from dataclasses import field, dataclass
3
-
4
- from _typeshed import Incomplete
5
- from ataraxis_data_structures import YamlConfig
6
-
7
- @dataclass
8
- class Main:
9
- """Stores global settings used to broadly define the suite2p processing configuration."""
10
-
11
- nplanes: int = ...
12
- nchannels: int = ...
13
- functional_chan: int = ...
14
- tau: float = ...
15
- force_sktiff: bool = ...
16
- fs: float = ...
17
- do_bidiphase: bool = ...
18
- bidiphase: int = ...
19
- bidi_corrected: bool = ...
20
- frames_include: int = ...
21
- multiplane_parallel: bool = ...
22
- ignore_flyback: list[int] = field(default_factory=list)
23
-
24
- @dataclass
25
- class FileIO:
26
- """Stores I/O settings used to specify input data file locations, formats, and output storage options."""
27
-
28
- fast_disk: list[str] = field(default_factory=list)
29
- delete_bin: bool = ...
30
- mesoscan: bool = ...
31
- bruker: bool = ...
32
- bruker_bidirectional: bool = ...
33
- h5py: list[str] = field(default_factory=list)
34
- h5py_key: str = ...
35
- nwb_file: str = ...
36
- nwb_driver: str = ...
37
- nwb_series: str = ...
38
- save_path0: list[str] = field(default_factory=list)
39
- save_folder: list[str] = field(default_factory=list)
40
- look_one_level_down: bool = ...
41
- subfolders: list[str] = field(default_factory=list)
42
- move_bin: bool = ...
43
-
44
- @dataclass
45
- class Output:
46
- """Stores I/O settings used to define the output format and organization of the processing results."""
47
-
48
- preclassify: float = ...
49
- save_nwb: bool = ...
50
- save_mat: bool = ...
51
- combined: bool = ...
52
- aspect: float = ...
53
- report_time: bool = ...
54
-
55
- @dataclass
56
- class Registration:
57
- """Stores rigid registration settings used for correcting motion artifacts between frames."""
58
-
59
- do_registration: bool = ...
60
- align_by_chan: int = ...
61
- nimg_init: int = ...
62
- batch_size: int = ...
63
- maxregshift: float = ...
64
- smooth_sigma: float = ...
65
- smooth_sigma_time: float = ...
66
- keep_movie_raw: bool = ...
67
- two_step_registration: bool = ...
68
- reg_tif: bool = ...
69
- reg_tif_chan2: bool = ...
70
- subpixel: int = ...
71
- th_badframes: float = ...
72
- norm_frames: bool = ...
73
- force_refImg: bool = ...
74
- pad_fft: bool = ...
75
-
76
- @dataclass
77
- class OnePRegistration:
78
- """Stores additional pre-registration processing settings used to improve the registration of 1-photon datasets."""
79
-
80
- one_p_reg: bool = ...
81
- spatial_hp_reg: int = ...
82
- pre_smooth: float = ...
83
- spatial_taper: float = ...
84
-
85
- @dataclass
86
- class NonRigid:
87
- """Stores non-rigid registration settings used to improve motion registration in complex datasets."""
88
-
89
- nonrigid: bool = ...
90
- block_size: list[int] = field(default_factory=Incomplete)
91
- snr_thresh: float = ...
92
- maxregshiftNR: float = ...
93
-
94
- @dataclass
95
- class ROIDetection:
96
- """Stores ROI detection and extraction settings used to identify cells and their activity signals."""
97
-
98
- roidetect: bool = ...
99
- sparse_mode: bool = ...
100
- spatial_scale: int = ...
101
- connected: bool = ...
102
- threshold_scaling: float = ...
103
- spatial_hp_detect: int = ...
104
- max_overlap: float = ...
105
- high_pass: int = ...
106
- smooth_masks: bool = ...
107
- max_iterations: int = ...
108
- nbinned: int = ...
109
- denoise: bool = ...
110
-
111
- @dataclass
112
- class CellposeDetection:
113
- """Stores Cellpose algorithm settings used for cell detection."""
114
-
115
- anatomical_only: int = ...
116
- diameter: int = ...
117
- cellprob_threshold: float = ...
118
- flow_threshold: float = ...
119
- spatial_hp_cp: int = ...
120
- pretrained_model: str = ...
121
-
122
- @dataclass
123
- class SignalExtraction:
124
- """Stores settings used to extract fluorescence signals from ROIs and surrounding neuropil regions."""
125
-
126
- neuropil_extract: bool = ...
127
- allow_overlap: bool = ...
128
- min_neuropil_pixels: int = ...
129
- inner_neuropil_radius: int = ...
130
- lam_percentile: int = ...
131
-
132
- @dataclass
133
- class SpikeDeconvolution:
134
- """Stores settings used to deconvolve calcium signals to infer spike trains."""
135
-
136
- spikedetect: bool = ...
137
- neucoeff: float = ...
138
- baseline: str = ...
139
- win_baseline: float = ...
140
- sig_baseline: float = ...
141
- prctile_baseline: float = ...
142
-
143
- @dataclass
144
- class Classification:
145
- """Stores settings used to classify detected ROIs as real cells or artifacts."""
146
-
147
- soma_crop: bool = ...
148
- use_builtin_classifier: bool = ...
149
- classifier_path: str = ...
150
-
151
- @dataclass
152
- class Channel2:
153
- """Stores settings for processing the second channel in multichannel datasets."""
154
-
155
- chan2_thres: float = ...
156
-
157
- @dataclass
158
- class Suite2PConfiguration(YamlConfig):
159
- """Stores the user-addressable suite2p configuration parameters, organized into subsections.
160
-
161
- This class is used during processing to instruct suite2p on how to process the data. Specifically, it provides a
162
- user-friendly way of specifying all user-addressable parameters through a .YAML file. The sl-forgery library then
163
- loads the data from .yaml file and uses it to configure the single-day suite2p pipeline and the multiday suite2p
164
- pipeline.
165
-
166
- Notes:
167
- The .YAML file uses section names that match the suite2p documentation sections. This way, users can always
168
- consult the suite2p documentation for information on the purpose of each field inside every subsection.
169
- """
170
-
171
- main: Main = field(default_factory=Main)
172
- file_io: FileIO = field(default_factory=FileIO)
173
- output: Output = field(default_factory=Output)
174
- registration: Registration = field(default_factory=Registration)
175
- one_p_registration: OnePRegistration = field(default_factory=OnePRegistration)
176
- non_rigid: NonRigid = field(default_factory=NonRigid)
177
- roi_detection: ROIDetection = field(default_factory=ROIDetection)
178
- cellpose_detection: CellposeDetection = field(default_factory=CellposeDetection)
179
- signal_extraction: SignalExtraction = field(default_factory=SignalExtraction)
180
- spike_deconvolution: SpikeDeconvolution = field(default_factory=SpikeDeconvolution)
181
- classification: Classification = field(default_factory=Classification)
182
- channel2: Channel2 = field(default_factory=Channel2)
183
- def to_ops(self) -> dict[str, Any]:
184
- """Converts the class instance to a dictionary and returns it to caller.
185
-
186
- This dictionary can be passed to suite2p functions either as an 'ops' or 'db' argument to control the
187
- processing runtime.
188
- """
@@ -1,53 +0,0 @@
1
- from pathlib import Path
2
-
3
- from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
4
-
5
- def _transfer_file(source_file: Path, source_directory: Path, destination_directory: Path) -> None:
6
- """Copies the input file from the source directory to the destination directory while preserving the file metadata.
7
-
8
- This is a worker method used by the transfer_directory() method to move multiple files in parallel.
9
-
10
- Notes:
11
- If the file is found under a hierarchy of subdirectories inside the input source_directory, that hierarchy will
12
- be preserved in the destination directory.
13
-
14
- Args:
15
- source_file: The file to be copied.
16
- source_directory: The root directory where the file is located.
17
- destination_directory: The destination directory where to move the file.
18
- """
19
-
20
- def transfer_directory(source: Path, destination: Path, num_threads: int = 1, verify_integrity: bool = True) -> None:
21
- """Copies the contents of the input directory tree from source to destination while preserving the folder
22
- structure.
23
-
24
- This function is used to assemble the experimental data from all remote machines used in the acquisition process on
25
- the VRPC before the data is preprocessed. It is also used to transfer the preprocessed data from the VRPC to the
26
- SynologyNAS and the Sun lab BioHPC server.
27
-
28
- Notes:
29
- This method recreates the moved directory hierarchy on the destination if the hierarchy does not exist. This is
30
- done before copying the files.
31
-
32
- The method executes a multithreading copy operation. It does not clean up the source files. That job is handed
33
- to the specific preprocessing function from the sl_experiment or sl-forgery libraries that calls this function.
34
-
35
- If the method is configured to verify transferred file integrity, it reruns the xxHash3-128 checksum calculation
36
- and compares the returned checksum to the one stored in the source directory. The method assumes that all input
37
- directories contain the 'ax_checksum.txt' file that stores the 'source' directory checksum at the highest level
38
- of the input directory tree.
39
-
40
- Args:
41
- source: The path to the directory that needs to be moved.
42
- destination: The path to the destination directory where to move the contents of the source directory.
43
- num_threads: The number of threads to use for parallel file transfer. This number should be set depending on the
44
- type of transfer (local or remote) and is not guaranteed to provide improved transfer performance. For local
45
- transfers, setting this number above 1 will likely provide a performance boost. For remote transfers using
46
- a single TCP / IP socket (such as non-multichannel SMB protocol), the number should be set to 1.
47
- verify_integrity: Determines whether to perform integrity verification for the transferred files. Note,
48
- integrity verification is a time-consuming process and generally would not be a concern for most runtimes.
49
- Therefore, it is often fine to disable this option to optimize method runtime speed.
50
-
51
- Raises:
52
- RuntimeError: If the transferred files do not pass the xxHas3-128 checksum integrity verification.
53
- """
@@ -1,20 +0,0 @@
1
- sl_shared_assets/__init__.py,sha256=V7EvTTSB_GhetCbyYPg2RoiG1etDVeML5EBWgGvUo7E,2227
2
- sl_shared_assets/__init__.pyi,sha256=U5Sma4zenITe0-eI_heTUYh_9P0puhgM3hAdcf-qozk,2532
3
- sl_shared_assets/cli.py,sha256=CjfuXXj7CeDA2pbCwe5Rad6RDjIqGDud14IUDMdzx_w,2639
4
- sl_shared_assets/cli.pyi,sha256=X5UdXpkzUw71_ftaIXMsnttIeR15SPVLiECuPge_zw8,1032
5
- sl_shared_assets/data_classes.py,sha256=Gilip0hQJqNS7SJSDK0crXy4eiD6Ws_OO5cBGInqaGw,87076
6
- sl_shared_assets/data_classes.pyi,sha256=5l2snWnlqTXGiJldGLvI4sc2G2_08aw9bFIOi_RmgjE,32240
7
- sl_shared_assets/packaging_tools.py,sha256=3kAXFK37Lv4JA1YhjcoBz1x2Ell8ObCqe9pwxAts4m4,6709
8
- sl_shared_assets/packaging_tools.pyi,sha256=hlAP9AxF7NHtFIPKjj5ehm8Vr9qIn6xDk4VvL0JuAmk,3055
9
- sl_shared_assets/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- sl_shared_assets/server.py,sha256=VtFwS4PEy24n_pGz9W56zufkZEf_PKxIllP2ZnF5Zgc,13269
11
- sl_shared_assets/server.pyi,sha256=Qa-pziy3fAoQKYJn1H4Bh_7oKgJu5ZmWBMogHQLUKZs,5550
12
- sl_shared_assets/suite2p.py,sha256=sQ5Zj0TJFD-gUHqtWnRvapBpr8QgmaiVil123cWxGxc,20511
13
- sl_shared_assets/suite2p.pyi,sha256=Uyv8ov--etwJIc6e2UVgs0jYXwnK2CD-kICfXo5KpcI,6331
14
- sl_shared_assets/transfer_tools.py,sha256=J26kwOp_NpPSY0-xu5FTw9udte-rm_mW1FJyaTNoqQI,6606
15
- sl_shared_assets/transfer_tools.pyi,sha256=FoH7eYZe7guGHfPr0MK5ggO62uXKwD2aJ7h1Bu7PaEE,3294
16
- sl_shared_assets-1.0.0rc8.dist-info/METADATA,sha256=6Omp-WLRWXVniIkMLG6SyujEEiJuptEk-2f1SQgA4m0,47806
17
- sl_shared_assets-1.0.0rc8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
- sl_shared_assets-1.0.0rc8.dist-info/entry_points.txt,sha256=3VPr5RkWBkusNN9OhWXtC-DN0utu7uMrUulazIK2VNA,166
19
- sl_shared_assets-1.0.0rc8.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
20
- sl_shared_assets-1.0.0rc8.dist-info/RECORD,,