sl-shared-assets 4.0.1__py3-none-any.whl → 5.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +48 -41
- sl_shared_assets/command_line_interfaces/__init__.py +3 -0
- sl_shared_assets/command_line_interfaces/configure.py +173 -0
- sl_shared_assets/command_line_interfaces/manage.py +226 -0
- sl_shared_assets/data_classes/__init__.py +33 -32
- sl_shared_assets/data_classes/configuration_data.py +267 -79
- sl_shared_assets/data_classes/session_data.py +226 -289
- sl_shared_assets/server/__init__.py +24 -4
- sl_shared_assets/server/job.py +6 -7
- sl_shared_assets/server/pipeline.py +585 -0
- sl_shared_assets/server/server.py +57 -25
- sl_shared_assets/tools/__init__.py +9 -8
- sl_shared_assets/tools/packaging_tools.py +14 -25
- sl_shared_assets/tools/project_management_tools.py +602 -523
- sl_shared_assets/tools/transfer_tools.py +88 -23
- {sl_shared_assets-4.0.1.dist-info → sl_shared_assets-5.0.1.dist-info}/METADATA +46 -203
- sl_shared_assets-5.0.1.dist-info/RECORD +23 -0
- sl_shared_assets-5.0.1.dist-info/entry_points.txt +3 -0
- sl_shared_assets/__init__.pyi +0 -91
- sl_shared_assets/cli.py +0 -501
- sl_shared_assets/cli.pyi +0 -106
- sl_shared_assets/data_classes/__init__.pyi +0 -75
- sl_shared_assets/data_classes/configuration_data.pyi +0 -235
- sl_shared_assets/data_classes/runtime_data.pyi +0 -157
- sl_shared_assets/data_classes/session_data.pyi +0 -379
- sl_shared_assets/data_classes/surgery_data.pyi +0 -89
- sl_shared_assets/server/__init__.pyi +0 -11
- sl_shared_assets/server/job.pyi +0 -205
- sl_shared_assets/server/server.pyi +0 -298
- sl_shared_assets/tools/__init__.pyi +0 -19
- sl_shared_assets/tools/ascension_tools.py +0 -265
- sl_shared_assets/tools/ascension_tools.pyi +0 -68
- sl_shared_assets/tools/packaging_tools.pyi +0 -58
- sl_shared_assets/tools/project_management_tools.pyi +0 -239
- sl_shared_assets/tools/transfer_tools.pyi +0 -53
- sl_shared_assets-4.0.1.dist-info/RECORD +0 -36
- sl_shared_assets-4.0.1.dist-info/entry_points.txt +0 -7
- {sl_shared_assets-4.0.1.dist-info → sl_shared_assets-5.0.1.dist-info}/WHEEL +0 -0
- {sl_shared_assets-4.0.1.dist-info → sl_shared_assets-5.0.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""This module provides the tools for working with
|
|
1
|
+
"""This module provides the tools for working with remote compute servers. Specifically, the classes from this
|
|
2
2
|
module establish an API for submitting jobs to the shared data processing cluster (managed via SLURM) and monitoring
|
|
3
3
|
the running job status. All lab processing and analysis pipelines use this interface for accessing shared compute
|
|
4
4
|
resources.
|
|
@@ -27,20 +27,22 @@ def generate_server_credentials(
|
|
|
27
27
|
output_directory: Path,
|
|
28
28
|
username: str,
|
|
29
29
|
password: str,
|
|
30
|
+
service: bool = False,
|
|
30
31
|
host: str = "cbsuwsun.biopic.cornell.edu",
|
|
31
32
|
storage_root: str = "/local/workdir",
|
|
32
33
|
working_root: str = "/local/storage",
|
|
33
34
|
shared_directory_name: str = "sun_data",
|
|
34
35
|
) -> None:
|
|
35
|
-
"""Generates a new
|
|
36
|
+
"""Generates a new server access credentials .yaml file under the specified directory, using input information.
|
|
36
37
|
|
|
37
|
-
This function provides a convenience interface for generating new
|
|
38
|
-
|
|
38
|
+
This function provides a convenience interface for generating new server access credential files. Depending on
|
|
39
|
+
configuration, it either creates user access credentials files or service access credentials files.
|
|
39
40
|
|
|
40
41
|
Args:
|
|
41
42
|
output_directory: The directory where to save the generated server_credentials.yaml file.
|
|
42
43
|
username: The username to use for server authentication.
|
|
43
44
|
password: The password to use for server authentication.
|
|
45
|
+
service: Determines whether the generated credentials file stores the data for a user or a service account.
|
|
44
46
|
host: The hostname or IP address of the server to connect to.
|
|
45
47
|
storage_root: The path to the root storage (slow) server directory. Typically, this is the path to the
|
|
46
48
|
top-level (root) directory of the HDD RAID volume.
|
|
@@ -50,15 +52,26 @@ def generate_server_credentials(
|
|
|
50
52
|
shared_directory_name: The name of the shared directory used to store all Sun lab project data on the storage
|
|
51
53
|
and working server volumes.
|
|
52
54
|
"""
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
55
|
+
if service:
|
|
56
|
+
ServerCredentials(
|
|
57
|
+
username=username,
|
|
58
|
+
password=password,
|
|
59
|
+
host=host,
|
|
60
|
+
storage_root=storage_root,
|
|
61
|
+
working_root=working_root,
|
|
62
|
+
shared_directory_name=shared_directory_name,
|
|
63
|
+
).to_yaml(file_path=output_directory.joinpath("service_credentials.yaml"))
|
|
64
|
+
console.echo(message="Service server access credentials file: Created.", level=LogLevel.SUCCESS)
|
|
65
|
+
else:
|
|
66
|
+
ServerCredentials(
|
|
67
|
+
username=username,
|
|
68
|
+
password=password,
|
|
69
|
+
host=host,
|
|
70
|
+
storage_root=storage_root,
|
|
71
|
+
working_root=working_root,
|
|
72
|
+
shared_directory_name=shared_directory_name,
|
|
73
|
+
).to_yaml(file_path=output_directory.joinpath("user_credentials.yaml"))
|
|
74
|
+
console.echo(message="User server access credentials file: Created.", level=LogLevel.SUCCESS)
|
|
62
75
|
|
|
63
76
|
|
|
64
77
|
@dataclass()
|
|
@@ -111,11 +124,11 @@ class ServerCredentials(YamlConfig):
|
|
|
111
124
|
|
|
112
125
|
|
|
113
126
|
class Server:
|
|
114
|
-
"""Encapsulates access to
|
|
127
|
+
"""Encapsulates access to a Sun lab processing server.
|
|
115
128
|
|
|
116
|
-
This class provides the API that allows accessing the
|
|
117
|
-
to the server. It functions as the central interface used by all processing pipelines in the
|
|
118
|
-
data processing on the server.
|
|
129
|
+
This class provides the API that allows accessing the remote processing server to create and submit various
|
|
130
|
+
SLURM-managed jobs to the server. It functions as the central interface used by all processing pipelines in the
|
|
131
|
+
lab to execute costly data processing on the server.
|
|
119
132
|
|
|
120
133
|
Notes:
|
|
121
134
|
All lab processing pipelines expect the data to be stored on the server and all processing logic to be packaged
|
|
@@ -306,7 +319,7 @@ class Server:
|
|
|
306
319
|
# include connection data received from the server.
|
|
307
320
|
return self.submit_job(job) # type: ignore[return-value]
|
|
308
321
|
|
|
309
|
-
def submit_job(self, job: Job | JupyterJob) -> Job | JupyterJob:
|
|
322
|
+
def submit_job(self, job: Job | JupyterJob, verbose: bool = True) -> Job | JupyterJob:
|
|
310
323
|
"""Submits the input job to the managed BioHPC server via SLURM job manager.
|
|
311
324
|
|
|
312
325
|
This method submits various jobs for execution via the SLURM-managed BioHPC cluster. As part of its runtime, the
|
|
@@ -315,6 +328,9 @@ class Server:
|
|
|
315
328
|
|
|
316
329
|
Args:
|
|
317
330
|
job: The Job object that contains all job data.
|
|
331
|
+
verbose: Determines whether to notify the user about non-error states of the job submission task. Typically,
|
|
332
|
+
this is disabled when batch-submitting jobs (for example, as part of running a processing pipeline) and
|
|
333
|
+
enabled when submitting single jobs.
|
|
318
334
|
|
|
319
335
|
Returns:
|
|
320
336
|
The job object whose 'job_id' attribute had been modified with the job ID if the job was successfully
|
|
@@ -323,7 +339,8 @@ class Server:
|
|
|
323
339
|
Raises:
|
|
324
340
|
RuntimeError: If job submission to the server fails.
|
|
325
341
|
"""
|
|
326
|
-
|
|
342
|
+
if verbose:
|
|
343
|
+
console.echo(message=f"Submitting '{job.job_name}' job to the remote server {self.host}...")
|
|
327
344
|
|
|
328
345
|
# Generates a temporary shell script on the local machine. Uses tempfile to automatically remove the
|
|
329
346
|
# local script as soon as it is uploaded to the server.
|
|
@@ -332,7 +349,7 @@ class Server:
|
|
|
332
349
|
fixed_script_content = job.command_script
|
|
333
350
|
|
|
334
351
|
# Creates a temporary script file locally and dumps translated command data into the file
|
|
335
|
-
with open(
|
|
352
|
+
with local_script_path.open("w") as f:
|
|
336
353
|
f.write(fixed_script_content)
|
|
337
354
|
|
|
338
355
|
# Uploads the command script to the server
|
|
@@ -400,6 +417,9 @@ class Server:
|
|
|
400
417
|
|
|
401
418
|
timer.delay_noblock(delay=5, allow_sleep=True) # Waits for 5 seconds before checking again
|
|
402
419
|
else:
|
|
420
|
+
# Aborts the job if the server is busy running other jobs
|
|
421
|
+
self.abort_job(job=job)
|
|
422
|
+
|
|
403
423
|
# Only raises the timeout error if the while loop is not broken in 120 seconds
|
|
404
424
|
message = (
|
|
405
425
|
f"Remote jupyter server job {job.job_name} with id {job.job_id} did not start within 120 seconds "
|
|
@@ -409,7 +429,8 @@ class Server:
|
|
|
409
429
|
console.error(message, TimeoutError)
|
|
410
430
|
raise TimeoutError(message) # Fallback to appease mypy
|
|
411
431
|
|
|
412
|
-
|
|
432
|
+
if verbose:
|
|
433
|
+
console.echo(message=f"{job.job_name} job: Submitted to {self.host}.", level=LogLevel.SUCCESS)
|
|
413
434
|
|
|
414
435
|
# Returns the updated job object
|
|
415
436
|
return job
|
|
@@ -603,7 +624,7 @@ class Server:
|
|
|
603
624
|
sftp.rmdir(str(remote_path))
|
|
604
625
|
|
|
605
626
|
except Exception as e:
|
|
606
|
-
console.echo(f"Unable to remove the specified directory {remote_path}: {
|
|
627
|
+
console.echo(f"Unable to remove the specified directory {remote_path}: {e!s}", level=LogLevel.WARNING)
|
|
607
628
|
|
|
608
629
|
def create_directory(self, remote_path: Path, parents: bool = True) -> None:
|
|
609
630
|
"""Creates the specified directory tree on the managed remote server via SFTP.
|
|
@@ -672,13 +693,14 @@ class Server:
|
|
|
672
693
|
# Checks if the target file or directory exists by trying to 'stat' it
|
|
673
694
|
sftp.stat(str(remote_path))
|
|
674
695
|
|
|
675
|
-
# If the request does not err, returns True (file or directory exists)
|
|
676
|
-
return True
|
|
677
|
-
|
|
678
696
|
# If the directory or file does not exist, returns False
|
|
679
697
|
except FileNotFoundError:
|
|
680
698
|
return False
|
|
681
699
|
|
|
700
|
+
else:
|
|
701
|
+
# If the request does not err, returns True (file or directory exists)
|
|
702
|
+
return True
|
|
703
|
+
|
|
682
704
|
def close(self) -> None:
|
|
683
705
|
"""Closes the SSH connection to the server.
|
|
684
706
|
|
|
@@ -723,3 +745,13 @@ class Server:
|
|
|
723
745
|
def user(self) -> str:
|
|
724
746
|
"""Returns the username used to authenticate with the server."""
|
|
725
747
|
return self._credentials.username
|
|
748
|
+
|
|
749
|
+
@property
|
|
750
|
+
def suite2p_configurations_directory(self) -> Path:
|
|
751
|
+
"""Returns the absolute path to the shared directory that stores all sl-suite2p runtime configuration files."""
|
|
752
|
+
return self.raw_data_root.joinpath("suite2p_configurations")
|
|
753
|
+
|
|
754
|
+
@property
|
|
755
|
+
def dlc_projects_directory(self) -> Path:
|
|
756
|
+
"""Returns the absolute path to the shared directory that stores all DeepLabCut projects."""
|
|
757
|
+
return self.raw_data_root.joinpath("deeplabcut_projects")
|
|
@@ -1,22 +1,23 @@
|
|
|
1
1
|
"""This package provides helper tools used to automate routine operations, such as transferring or verifying the
|
|
2
2
|
integrity of the data. The tools from this package are used by most other data processing libraries in the lab."""
|
|
3
3
|
|
|
4
|
-
from .transfer_tools import transfer_directory
|
|
5
|
-
from .ascension_tools import ascend_tyche_data
|
|
4
|
+
from .transfer_tools import delete_directory, transfer_directory
|
|
6
5
|
from .packaging_tools import calculate_directory_checksum
|
|
7
6
|
from .project_management_tools import (
|
|
8
7
|
ProjectManifest,
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
archive_session,
|
|
9
|
+
prepare_session,
|
|
10
|
+
resolve_checksum,
|
|
11
11
|
generate_project_manifest,
|
|
12
12
|
)
|
|
13
13
|
|
|
14
14
|
__all__ = [
|
|
15
15
|
"ProjectManifest",
|
|
16
|
-
"
|
|
16
|
+
"archive_session",
|
|
17
17
|
"calculate_directory_checksum",
|
|
18
|
-
"
|
|
19
|
-
"verify_session_checksum",
|
|
18
|
+
"delete_directory",
|
|
20
19
|
"generate_project_manifest",
|
|
21
|
-
"
|
|
20
|
+
"prepare_session",
|
|
21
|
+
"resolve_checksum",
|
|
22
|
+
"transfer_directory",
|
|
22
23
|
]
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
"""This module provides
|
|
2
|
-
|
|
1
|
+
"""This module provides tools for packaging data for transmission. Although this module is primarily used when
|
|
2
|
+
transmitting data over the network, it also works for local (within-machine) transfers. The tools from
|
|
3
|
+
this module work in tandem with tools offered by transfer_tools.py to ensure the integrity of the transferred data.
|
|
3
4
|
"""
|
|
4
5
|
|
|
5
6
|
import os
|
|
@@ -10,8 +11,6 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
|
10
11
|
from tqdm import tqdm
|
|
11
12
|
import xxhash
|
|
12
13
|
|
|
13
|
-
from ..data_classes import TrackerFileNames
|
|
14
|
-
|
|
15
14
|
# Defines a 'blacklist' set of files. Primarily, this list contains the service files that may change after the session
|
|
16
15
|
# data has been acquired. Therefore, it does not make sense to include them in the checksum, as they do not reflect the
|
|
17
16
|
# data that should remain permanently unchanged. Note, make sure all service files are added to this set!
|
|
@@ -19,22 +18,16 @@ _excluded_files = {
|
|
|
19
18
|
"ax_checksum.txt",
|
|
20
19
|
"ubiquitin.bin",
|
|
21
20
|
"telomere.bin",
|
|
22
|
-
"p53.bin",
|
|
23
21
|
"nk.bin",
|
|
24
22
|
}
|
|
25
23
|
|
|
26
|
-
# Extends the exclusion set to include all tracker .yaml files and their concurrent access .lock files.
|
|
27
|
-
for name in tuple(TrackerFileNames):
|
|
28
|
-
_excluded_files.add(name)
|
|
29
|
-
_excluded_files.add(f"{name}.lock")
|
|
30
|
-
|
|
31
24
|
|
|
32
25
|
def _calculate_file_checksum(base_directory: Path, file_path: Path) -> tuple[str, bytes]:
|
|
33
|
-
"""Calculates xxHash3-128 checksum for
|
|
26
|
+
"""Calculates xxHash3-128 checksum for the target file and its path relative to the base directory.
|
|
34
27
|
|
|
35
28
|
This function is passed to parallel workers used by the calculate_directory_hash() method that iteratively
|
|
36
29
|
calculates the checksum for all files inside a directory. Each call to this function returns the checksum for the
|
|
37
|
-
target file, which
|
|
30
|
+
target file, which reflects both the contents of the file and its path relative to the base directory.
|
|
38
31
|
|
|
39
32
|
Args:
|
|
40
33
|
base_directory: The path to the base (root) directory which is being checksummed by the main
|
|
@@ -55,7 +48,7 @@ def _calculate_file_checksum(base_directory: Path, file_path: Path) -> tuple[str
|
|
|
55
48
|
|
|
56
49
|
# Extends the checksum to reflect the file data state. Uses 8 MB chunks to avoid excessive RAM hogging at the cost
|
|
57
50
|
# of slightly reduced throughput.
|
|
58
|
-
with open(
|
|
51
|
+
with file_path.open("rb") as f:
|
|
59
52
|
for chunk in iter(lambda: f.read(1024 * 1024 * 8), b""):
|
|
60
53
|
checksum.update(chunk)
|
|
61
54
|
|
|
@@ -71,18 +64,14 @@ def calculate_directory_checksum(
|
|
|
71
64
|
"""Calculates xxHash3-128 checksum for the input directory, which includes the data of all contained files and
|
|
72
65
|
the directory structure information.
|
|
73
66
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
write the generated checksum as a hexadecimal string to the ax_checksum.txt file stored at the highest level of the
|
|
78
|
-
input directory.
|
|
67
|
+
Checksums are used to verify the data integrity during transmission within machines (from one storage volume to
|
|
68
|
+
another) and between machines. The function can be configured to write the generated checksum as a hexadecimal
|
|
69
|
+
string to the ax_checksum.txt file stored at the highest level of the input directory.
|
|
79
70
|
|
|
80
71
|
Note:
|
|
81
|
-
This
|
|
82
|
-
combination with xxHash3, this achieves a significant speedup over
|
|
83
|
-
SHA256. Note that xxHash3 is not suitable for security purposes and is only used to ensure data integrity.
|
|
84
|
-
|
|
85
|
-
The method notifies the user about the checksum calculation process via the terminal.
|
|
72
|
+
This function uses multiprocessing to efficiently parallelize checksum calculation for multiple files. In
|
|
73
|
+
combination with xxHash3, this achieves a significant speedup over other common checksum options, such as MD5
|
|
74
|
+
and SHA256. Note that xxHash3 is not suitable for security purposes and is only used to ensure data integrity.
|
|
86
75
|
|
|
87
76
|
The returned checksum accounts for both the contents of each file and the layout of the input directory
|
|
88
77
|
structure.
|
|
@@ -145,8 +134,8 @@ def calculate_directory_checksum(
|
|
|
145
134
|
|
|
146
135
|
# Writes the hash to ax_checksum.txt in the root directory
|
|
147
136
|
if save_checksum:
|
|
148
|
-
checksum_path = directory
|
|
149
|
-
with open(
|
|
137
|
+
checksum_path = directory.joinpath("ax_checksum.txt")
|
|
138
|
+
with checksum_path.open("w") as f:
|
|
150
139
|
f.write(checksum_hexstr)
|
|
151
140
|
|
|
152
141
|
return checksum_hexstr
|