sl-shared-assets 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +96 -0
- sl_shared_assets/__init__.pyi +87 -0
- sl_shared_assets/cli.py +72 -0
- sl_shared_assets/cli.pyi +17 -0
- sl_shared_assets/data_classes.py +1435 -0
- sl_shared_assets/data_classes.pyi +646 -0
- sl_shared_assets/packaging_tools.py +133 -0
- sl_shared_assets/packaging_tools.pyi +52 -0
- sl_shared_assets/py.typed +0 -0
- sl_shared_assets/server.py +293 -0
- sl_shared_assets/server.pyi +112 -0
- sl_shared_assets/suite2p.py +449 -0
- sl_shared_assets/suite2p.pyi +188 -0
- sl_shared_assets/transfer_tools.py +119 -0
- sl_shared_assets/transfer_tools.pyi +53 -0
- sl_shared_assets-1.0.0rc1.dist-info/METADATA +849 -0
- sl_shared_assets-1.0.0rc1.dist-info/RECORD +20 -0
- sl_shared_assets-1.0.0rc1.dist-info/WHEEL +4 -0
- sl_shared_assets-1.0.0rc1.dist-info/entry_points.txt +3 -0
- sl_shared_assets-1.0.0rc1.dist-info/licenses/LICENSE +674 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""This module provides methods for moving session runtime data between the local machine, the ScanImage (Mesoscope) PC,
|
|
2
|
+
the Synology NAS drive, and the lab BioHPC server. All methods in this module expect that the destinations and sources
|
|
3
|
+
are mounted on the host file-system via the SMB or an equivalent protocol.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import shutil
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
9
|
+
|
|
10
|
+
from tqdm import tqdm
|
|
11
|
+
from ataraxis_base_utilities import console, ensure_directory_exists
|
|
12
|
+
|
|
13
|
+
from .packaging_tools import calculate_directory_checksum
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _transfer_file(source_file: Path, source_directory: Path, destination_directory: Path) -> None:
|
|
17
|
+
"""Copies the input file from the source directory to the destination directory while preserving the file metadata.
|
|
18
|
+
|
|
19
|
+
This is a worker method used by the transfer_directory() method to move multiple files in parallel.
|
|
20
|
+
|
|
21
|
+
Notes:
|
|
22
|
+
If the file is found under a hierarchy of subdirectories inside the input source_directory, that hierarchy will
|
|
23
|
+
be preserved in the destination directory.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
source_file: The file to be copied.
|
|
27
|
+
source_directory: The root directory where the file is located.
|
|
28
|
+
destination_directory: The destination directory where to move the file.
|
|
29
|
+
"""
|
|
30
|
+
relative = source_file.relative_to(source_directory)
|
|
31
|
+
dest_file = destination_directory / relative
|
|
32
|
+
shutil.copy2(source_file, dest_file)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def transfer_directory(source: Path, destination: Path, num_threads: int = 1, verify_integrity: bool = True) -> None:
|
|
36
|
+
"""Copies the contents of the input directory tree from source to destination while preserving the folder
|
|
37
|
+
structure.
|
|
38
|
+
|
|
39
|
+
This function is used to assemble the experimental data from all remote machines used in the acquisition process on
|
|
40
|
+
the VRPC before the data is preprocessed. It is also used to transfer the preprocessed data from the VRPC to the
|
|
41
|
+
SynologyNAS and the Sun lab BioHPC server.
|
|
42
|
+
|
|
43
|
+
Notes:
|
|
44
|
+
This method recreates the moved directory hierarchy on the destination if the hierarchy does not exist. This is
|
|
45
|
+
done before copying the files.
|
|
46
|
+
|
|
47
|
+
The method executes a multithreading copy operation. It does not clean up the source files. That job is handed
|
|
48
|
+
to the specific preprocessing function from the sl_experiment or sl-forgery libraries that calls this function.
|
|
49
|
+
|
|
50
|
+
If the method is configured to verify transferred file integrity, it reruns the xxHash3-128 checksum calculation
|
|
51
|
+
and compares the returned checksum to the one stored in the source directory. The method assumes that all input
|
|
52
|
+
directories contain the 'ax_checksum.txt' file that stores the 'source' directory checksum at the highest level
|
|
53
|
+
of the input directory tree.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
source: The path to the directory that needs to be moved.
|
|
57
|
+
destination: The path to the destination directory where to move the contents of the source directory.
|
|
58
|
+
num_threads: The number of threads to use for parallel file transfer. This number should be set depending on the
|
|
59
|
+
type of transfer (local or remote) and is not guaranteed to provide improved transfer performance. For local
|
|
60
|
+
transfers, setting this number above 1 will likely provide a performance boost. For remote transfers using
|
|
61
|
+
a single TCP / IP socket (such as non-multichannel SMB protocol), the number should be set to 1.
|
|
62
|
+
verify_integrity: Determines whether to perform integrity verification for the transferred files. Note,
|
|
63
|
+
integrity verification is a time-consuming process and generally would not be a concern for most runtimes.
|
|
64
|
+
Therefore, it is often fine to disable this option to optimize method runtime speed.
|
|
65
|
+
|
|
66
|
+
Raises:
|
|
67
|
+
RuntimeError: If the transferred files do not pass the xxHas3-128 checksum integrity verification.
|
|
68
|
+
"""
|
|
69
|
+
if not source.exists():
|
|
70
|
+
message = f"Unable to move the directory {source}, as it does not exist."
|
|
71
|
+
console.error(message=message, error=FileNotFoundError)
|
|
72
|
+
|
|
73
|
+
# Ensures the destination root directory exists.
|
|
74
|
+
ensure_directory_exists(destination)
|
|
75
|
+
|
|
76
|
+
# Collects all items (files and directories) in the source directory.
|
|
77
|
+
all_items = tuple(source.rglob("*"))
|
|
78
|
+
|
|
79
|
+
# Loops over all items (files and directories). Adds files to the file_list variable. Uses directories to reinstate
|
|
80
|
+
# the source subdirectory hierarchy in the destination directory.
|
|
81
|
+
file_list = []
|
|
82
|
+
for item in sorted(all_items, key=lambda x: len(x.relative_to(source).parts)):
|
|
83
|
+
# Recreates directory structure on destination
|
|
84
|
+
if item.is_dir():
|
|
85
|
+
dest_dir = destination / item.relative_to(source)
|
|
86
|
+
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
87
|
+
# Also builds the list of files to be moved
|
|
88
|
+
else: # is_file()
|
|
89
|
+
file_list.append(item)
|
|
90
|
+
|
|
91
|
+
# Copies the data to the destination. For parallel workflows, the method uses the ThreadPoolExecutor to move
|
|
92
|
+
# multiple files at the same time. Since I/O operations do not hold GIL, we do not need to parallelize with
|
|
93
|
+
# Processes here.
|
|
94
|
+
if num_threads > 1:
|
|
95
|
+
with ThreadPoolExecutor(max_workers=num_threads) as executor:
|
|
96
|
+
futures = {executor.submit(_transfer_file, file, source, destination): file for file in file_list}
|
|
97
|
+
for future in tqdm(
|
|
98
|
+
as_completed(futures),
|
|
99
|
+
total=len(file_list),
|
|
100
|
+
desc=f"Transferring files to {Path(*destination.parts[-6:])}",
|
|
101
|
+
unit="file",
|
|
102
|
+
):
|
|
103
|
+
# Propagates any exceptions from the file transfer.
|
|
104
|
+
future.result()
|
|
105
|
+
else:
|
|
106
|
+
for file in tqdm(file_list, desc=f"Transferring files to {Path(*destination.parts[-6:])}", unit="file"):
|
|
107
|
+
_transfer_file(file, source, destination)
|
|
108
|
+
|
|
109
|
+
# Verifies the integrity of the transferred directory by rerunning xxHash3-128 calculation.
|
|
110
|
+
if verify_integrity:
|
|
111
|
+
destination_checksum = calculate_directory_checksum(directory=destination, batch=False, save_checksum=False)
|
|
112
|
+
with open(file=source.joinpath("ax_checksum.txt"), mode="r") as local_checksum:
|
|
113
|
+
message = (
|
|
114
|
+
f"Checksum mismatch detected when transferring {Path(*source.parts[-6:])} to "
|
|
115
|
+
f"{Path(*destination.parts[-6:])}! The data was likely corrupted in transmission. User intervention "
|
|
116
|
+
f"required."
|
|
117
|
+
)
|
|
118
|
+
if not destination_checksum == local_checksum.readline().strip():
|
|
119
|
+
console.error(message=message, error=RuntimeError)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
|
|
4
|
+
|
|
5
|
+
def _transfer_file(source_file: Path, source_directory: Path, destination_directory: Path) -> None:
|
|
6
|
+
"""Copies the input file from the source directory to the destination directory while preserving the file metadata.
|
|
7
|
+
|
|
8
|
+
This is a worker method used by the transfer_directory() method to move multiple files in parallel.
|
|
9
|
+
|
|
10
|
+
Notes:
|
|
11
|
+
If the file is found under a hierarchy of subdirectories inside the input source_directory, that hierarchy will
|
|
12
|
+
be preserved in the destination directory.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
source_file: The file to be copied.
|
|
16
|
+
source_directory: The root directory where the file is located.
|
|
17
|
+
destination_directory: The destination directory where to move the file.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def transfer_directory(source: Path, destination: Path, num_threads: int = 1, verify_integrity: bool = True) -> None:
|
|
21
|
+
"""Copies the contents of the input directory tree from source to destination while preserving the folder
|
|
22
|
+
structure.
|
|
23
|
+
|
|
24
|
+
This function is used to assemble the experimental data from all remote machines used in the acquisition process on
|
|
25
|
+
the VRPC before the data is preprocessed. It is also used to transfer the preprocessed data from the VRPC to the
|
|
26
|
+
SynologyNAS and the Sun lab BioHPC server.
|
|
27
|
+
|
|
28
|
+
Notes:
|
|
29
|
+
This method recreates the moved directory hierarchy on the destination if the hierarchy does not exist. This is
|
|
30
|
+
done before copying the files.
|
|
31
|
+
|
|
32
|
+
The method executes a multithreading copy operation. It does not clean up the source files. That job is handed
|
|
33
|
+
to the specific preprocessing function from the sl_experiment or sl-forgery libraries that calls this function.
|
|
34
|
+
|
|
35
|
+
If the method is configured to verify transferred file integrity, it reruns the xxHash3-128 checksum calculation
|
|
36
|
+
and compares the returned checksum to the one stored in the source directory. The method assumes that all input
|
|
37
|
+
directories contain the 'ax_checksum.txt' file that stores the 'source' directory checksum at the highest level
|
|
38
|
+
of the input directory tree.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
source: The path to the directory that needs to be moved.
|
|
42
|
+
destination: The path to the destination directory where to move the contents of the source directory.
|
|
43
|
+
num_threads: The number of threads to use for parallel file transfer. This number should be set depending on the
|
|
44
|
+
type of transfer (local or remote) and is not guaranteed to provide improved transfer performance. For local
|
|
45
|
+
transfers, setting this number above 1 will likely provide a performance boost. For remote transfers using
|
|
46
|
+
a single TCP / IP socket (such as non-multichannel SMB protocol), the number should be set to 1.
|
|
47
|
+
verify_integrity: Determines whether to perform integrity verification for the transferred files. Note,
|
|
48
|
+
integrity verification is a time-consuming process and generally would not be a concern for most runtimes.
|
|
49
|
+
Therefore, it is often fine to disable this option to optimize method runtime speed.
|
|
50
|
+
|
|
51
|
+
Raises:
|
|
52
|
+
RuntimeError: If the transferred files do not pass the xxHas3-128 checksum integrity verification.
|
|
53
|
+
"""
|