sl-shared-assets 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

@@ -0,0 +1,119 @@
1
+ """This module provides methods for moving session runtime data between the local machine, the ScanImage (Mesoscope) PC,
2
+ the Synology NAS drive, and the lab BioHPC server. All methods in this module expect that the destinations and sources
3
+ are mounted on the host file-system via the SMB or an equivalent protocol.
4
+ """
5
+
6
+ import shutil
7
+ from pathlib import Path
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+
10
+ from tqdm import tqdm
11
+ from ataraxis_base_utilities import console, ensure_directory_exists
12
+
13
+ from .packaging_tools import calculate_directory_checksum
14
+
15
+
16
+ def _transfer_file(source_file: Path, source_directory: Path, destination_directory: Path) -> None:
17
+ """Copies the input file from the source directory to the destination directory while preserving the file metadata.
18
+
19
+ This is a worker method used by the transfer_directory() method to move multiple files in parallel.
20
+
21
+ Notes:
22
+ If the file is found under a hierarchy of subdirectories inside the input source_directory, that hierarchy will
23
+ be preserved in the destination directory.
24
+
25
+ Args:
26
+ source_file: The file to be copied.
27
+ source_directory: The root directory where the file is located.
28
+ destination_directory: The destination directory where to move the file.
29
+ """
30
+ relative = source_file.relative_to(source_directory)
31
+ dest_file = destination_directory / relative
32
+ shutil.copy2(source_file, dest_file)
33
+
34
+
35
+ def transfer_directory(source: Path, destination: Path, num_threads: int = 1, verify_integrity: bool = True) -> None:
36
+ """Copies the contents of the input directory tree from source to destination while preserving the folder
37
+ structure.
38
+
39
+ This function is used to assemble the experimental data from all remote machines used in the acquisition process on
40
+ the VRPC before the data is preprocessed. It is also used to transfer the preprocessed data from the VRPC to the
41
+ SynologyNAS and the Sun lab BioHPC server.
42
+
43
+ Notes:
44
+ This method recreates the moved directory hierarchy on the destination if the hierarchy does not exist. This is
45
+ done before copying the files.
46
+
47
+ The method executes a multithreading copy operation. It does not clean up the source files. That job is handed
48
+ to the specific preprocessing function from the sl_experiment or sl-forgery libraries that calls this function.
49
+
50
+ If the method is configured to verify transferred file integrity, it reruns the xxHash3-128 checksum calculation
51
+ and compares the returned checksum to the one stored in the source directory. The method assumes that all input
52
+ directories contain the 'ax_checksum.txt' file that stores the 'source' directory checksum at the highest level
53
+ of the input directory tree.
54
+
55
+ Args:
56
+ source: The path to the directory that needs to be moved.
57
+ destination: The path to the destination directory where to move the contents of the source directory.
58
+ num_threads: The number of threads to use for parallel file transfer. This number should be set depending on the
59
+ type of transfer (local or remote) and is not guaranteed to provide improved transfer performance. For local
60
+ transfers, setting this number above 1 will likely provide a performance boost. For remote transfers using
61
+ a single TCP / IP socket (such as non-multichannel SMB protocol), the number should be set to 1.
62
+ verify_integrity: Determines whether to perform integrity verification for the transferred files. Note,
63
+ integrity verification is a time-consuming process and generally would not be a concern for most runtimes.
64
+ Therefore, it is often fine to disable this option to optimize method runtime speed.
65
+
66
+ Raises:
67
+ RuntimeError: If the transferred files do not pass the xxHas3-128 checksum integrity verification.
68
+ """
69
+ if not source.exists():
70
+ message = f"Unable to move the directory {source}, as it does not exist."
71
+ console.error(message=message, error=FileNotFoundError)
72
+
73
+ # Ensures the destination root directory exists.
74
+ ensure_directory_exists(destination)
75
+
76
+ # Collects all items (files and directories) in the source directory.
77
+ all_items = tuple(source.rglob("*"))
78
+
79
+ # Loops over all items (files and directories). Adds files to the file_list variable. Uses directories to reinstate
80
+ # the source subdirectory hierarchy in the destination directory.
81
+ file_list = []
82
+ for item in sorted(all_items, key=lambda x: len(x.relative_to(source).parts)):
83
+ # Recreates directory structure on destination
84
+ if item.is_dir():
85
+ dest_dir = destination / item.relative_to(source)
86
+ dest_dir.mkdir(parents=True, exist_ok=True)
87
+ # Also builds the list of files to be moved
88
+ else: # is_file()
89
+ file_list.append(item)
90
+
91
+ # Copies the data to the destination. For parallel workflows, the method uses the ThreadPoolExecutor to move
92
+ # multiple files at the same time. Since I/O operations do not hold GIL, we do not need to parallelize with
93
+ # Processes here.
94
+ if num_threads > 1:
95
+ with ThreadPoolExecutor(max_workers=num_threads) as executor:
96
+ futures = {executor.submit(_transfer_file, file, source, destination): file for file in file_list}
97
+ for future in tqdm(
98
+ as_completed(futures),
99
+ total=len(file_list),
100
+ desc=f"Transferring files to {Path(*destination.parts[-6:])}",
101
+ unit="file",
102
+ ):
103
+ # Propagates any exceptions from the file transfer.
104
+ future.result()
105
+ else:
106
+ for file in tqdm(file_list, desc=f"Transferring files to {Path(*destination.parts[-6:])}", unit="file"):
107
+ _transfer_file(file, source, destination)
108
+
109
+ # Verifies the integrity of the transferred directory by rerunning xxHash3-128 calculation.
110
+ if verify_integrity:
111
+ destination_checksum = calculate_directory_checksum(directory=destination, batch=False, save_checksum=False)
112
+ with open(file=source.joinpath("ax_checksum.txt"), mode="r") as local_checksum:
113
+ message = (
114
+ f"Checksum mismatch detected when transferring {Path(*source.parts[-6:])} to "
115
+ f"{Path(*destination.parts[-6:])}! The data was likely corrupted in transmission. User intervention "
116
+ f"required."
117
+ )
118
+ if not destination_checksum == local_checksum.readline().strip():
119
+ console.error(message=message, error=RuntimeError)
@@ -0,0 +1,53 @@
1
+ from pathlib import Path
2
+
3
+ from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
4
+
5
+ def _transfer_file(source_file: Path, source_directory: Path, destination_directory: Path) -> None:
6
+ """Copies the input file from the source directory to the destination directory while preserving the file metadata.
7
+
8
+ This is a worker method used by the transfer_directory() method to move multiple files in parallel.
9
+
10
+ Notes:
11
+ If the file is found under a hierarchy of subdirectories inside the input source_directory, that hierarchy will
12
+ be preserved in the destination directory.
13
+
14
+ Args:
15
+ source_file: The file to be copied.
16
+ source_directory: The root directory where the file is located.
17
+ destination_directory: The destination directory where to move the file.
18
+ """
19
+
20
+ def transfer_directory(source: Path, destination: Path, num_threads: int = 1, verify_integrity: bool = True) -> None:
21
+ """Copies the contents of the input directory tree from source to destination while preserving the folder
22
+ structure.
23
+
24
+ This function is used to assemble the experimental data from all remote machines used in the acquisition process on
25
+ the VRPC before the data is preprocessed. It is also used to transfer the preprocessed data from the VRPC to the
26
+ SynologyNAS and the Sun lab BioHPC server.
27
+
28
+ Notes:
29
+ This method recreates the moved directory hierarchy on the destination if the hierarchy does not exist. This is
30
+ done before copying the files.
31
+
32
+ The method executes a multithreading copy operation. It does not clean up the source files. That job is handed
33
+ to the specific preprocessing function from the sl_experiment or sl-forgery libraries that calls this function.
34
+
35
+ If the method is configured to verify transferred file integrity, it reruns the xxHash3-128 checksum calculation
36
+ and compares the returned checksum to the one stored in the source directory. The method assumes that all input
37
+ directories contain the 'ax_checksum.txt' file that stores the 'source' directory checksum at the highest level
38
+ of the input directory tree.
39
+
40
+ Args:
41
+ source: The path to the directory that needs to be moved.
42
+ destination: The path to the destination directory where to move the contents of the source directory.
43
+ num_threads: The number of threads to use for parallel file transfer. This number should be set depending on the
44
+ type of transfer (local or remote) and is not guaranteed to provide improved transfer performance. For local
45
+ transfers, setting this number above 1 will likely provide a performance boost. For remote transfers using
46
+ a single TCP / IP socket (such as non-multichannel SMB protocol), the number should be set to 1.
47
+ verify_integrity: Determines whether to perform integrity verification for the transferred files. Note,
48
+ integrity verification is a time-consuming process and generally would not be a concern for most runtimes.
49
+ Therefore, it is often fine to disable this option to optimize method runtime speed.
50
+
51
+ Raises:
52
+ RuntimeError: If the transferred files do not pass the xxHas3-128 checksum integrity verification.
53
+ """