PyPI - sl-shared-assets - Versions diffs - 4.0.0__py3-none-any.whl → 5.0.0__py3-none-any.whl - Mend

sl-shared-assets 4.0.0py3-none-any.whl → 5.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (41) hide show

sl_shared_assets/__init__.py +45 -42
sl_shared_assets/command_line_interfaces/__init__.py +3 -0
sl_shared_assets/command_line_interfaces/configure.py +173 -0
sl_shared_assets/command_line_interfaces/manage.py +226 -0
sl_shared_assets/data_classes/__init__.py +33 -32
sl_shared_assets/data_classes/configuration_data.py +267 -79
sl_shared_assets/data_classes/runtime_data.py +11 -11
sl_shared_assets/data_classes/session_data.py +226 -289
sl_shared_assets/data_classes/surgery_data.py +6 -6
sl_shared_assets/server/__init__.py +24 -4
sl_shared_assets/server/job.py +6 -7
sl_shared_assets/server/pipeline.py +570 -0
sl_shared_assets/server/server.py +57 -25
sl_shared_assets/tools/__init__.py +9 -8
sl_shared_assets/tools/packaging_tools.py +14 -25
sl_shared_assets/tools/project_management_tools.py +602 -523
sl_shared_assets/tools/transfer_tools.py +88 -23
{sl_shared_assets-4.0.0.dist-info → sl_shared_assets-5.0.0.dist-info}/METADATA +46 -202
sl_shared_assets-5.0.0.dist-info/RECORD +23 -0
sl_shared_assets-5.0.0.dist-info/entry_points.txt +3 -0
sl_shared_assets/__init__.pyi +0 -91
sl_shared_assets/cli.py +0 -500
sl_shared_assets/cli.pyi +0 -106
sl_shared_assets/data_classes/__init__.pyi +0 -75
sl_shared_assets/data_classes/configuration_data.pyi +0 -235
sl_shared_assets/data_classes/runtime_data.pyi +0 -157
sl_shared_assets/data_classes/session_data.pyi +0 -379
sl_shared_assets/data_classes/surgery_data.pyi +0 -89
sl_shared_assets/server/__init__.pyi +0 -11
sl_shared_assets/server/job.pyi +0 -205
sl_shared_assets/server/server.pyi +0 -298
sl_shared_assets/tools/__init__.pyi +0 -19
sl_shared_assets/tools/ascension_tools.py +0 -265
sl_shared_assets/tools/ascension_tools.pyi +0 -68
sl_shared_assets/tools/packaging_tools.pyi +0 -58
sl_shared_assets/tools/project_management_tools.pyi +0 -239
sl_shared_assets/tools/transfer_tools.pyi +0 -53
sl_shared_assets-4.0.0.dist-info/RECORD +0 -36
sl_shared_assets-4.0.0.dist-info/entry_points.txt +0 -7
{sl_shared_assets-4.0.0.dist-info → sl_shared_assets-5.0.0.dist-info}/WHEEL +0 -0
{sl_shared_assets-4.0.0.dist-info → sl_shared_assets-5.0.0.dist-info}/licenses/LICENSE +0 -0

sl_shared_assets/server/server.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""This module provides the tools for working with the Sun lab BioHPC cluster. Specifically, the classes from this
+"""This module provides the tools for working with remote compute servers. Specifically, the classes from this
 module establish an API for submitting jobs to the shared data processing cluster (managed via SLURM) and monitoring
 the running job status. All lab processing and analysis pipelines use this interface for accessing shared compute
 resources.
@@ -27,20 +27,22 @@ def generate_server_credentials(
     output_directory: Path,
     username: str,
     password: str,
+    service: bool = False,
     host: str = "cbsuwsun.biopic.cornell.edu",
     storage_root: str = "/local/workdir",
     working_root: str = "/local/storage",
     shared_directory_name: str = "sun_data",
 ) -> None:
-    """Generates a new server_credentials.yaml file under the specified directory, using input information.
+    """Generates a new server access credentials .yaml file under the specified directory, using input information.
-    This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
-    only used when setting up new host-computers or users in the lab.
+    This function provides a convenience interface for generating new server access credential files. Depending on
+    configuration, it either creates user access credentials files or service access credentials files.
     Args:
         output_directory: The directory where to save the generated server_credentials.yaml file.
         username: The username to use for server authentication.
         password: The password to use for server authentication.
+        service: Determines whether the generated credentials file stores the data for a user or a service account.
         host: The hostname or IP address of the server to connect to.
         storage_root: The path to the root storage (slow) server directory. Typically, this is the path to the
             top-level (root) directory of the HDD RAID volume.
@@ -50,15 +52,26 @@ def generate_server_credentials(
         shared_directory_name: The name of the shared directory used to store all Sun lab project data on the storage
             and working server volumes.
     """
-    # noinspection PyArgumentList
-    ServerCredentials(
-        username=username,
-        password=password,
-        host=host,
-        storage_root=storage_root,
-        working_root=working_root,
-        shared_directory_name=shared_directory_name,
-    ).to_yaml(file_path=output_directory.joinpath("server_credentials.yaml"))
+    if service:
+        ServerCredentials(
+            username=username,
+            password=password,
+            host=host,
+            storage_root=storage_root,
+            working_root=working_root,
+            shared_directory_name=shared_directory_name,
+        ).to_yaml(file_path=output_directory.joinpath("service_credentials.yaml"))
+        console.echo(message="Service server access credentials file: Created.", level=LogLevel.SUCCESS)
+    else:
+        ServerCredentials(
+            username=username,
+            password=password,
+            host=host,
+            storage_root=storage_root,
+            working_root=working_root,
+            shared_directory_name=shared_directory_name,
+        ).to_yaml(file_path=output_directory.joinpath("user_credentials.yaml"))
+        console.echo(message="User server access credentials file: Created.", level=LogLevel.SUCCESS)
 @dataclass()
@@ -111,11 +124,11 @@ class ServerCredentials(YamlConfig):
 class Server:
-    """Encapsulates access to the Sun lab BioHPC processing server.
+    """Encapsulates access to a Sun lab processing server.
-    This class provides the API that allows accessing the BioHPC server to create and submit various SLURM-managed jobs
-    to the server. It functions as the central interface used by all processing pipelines in the lab to execute costly
-    data processing on the server.
+    This class provides the API that allows accessing the remote processing server to create and submit various
+    SLURM-managed jobs to the server. It functions as the central interface used by all processing pipelines in the
+    lab to execute costly data processing on the server.
     Notes:
         All lab processing pipelines expect the data to be stored on the server and all processing logic to be packaged
@@ -306,7 +319,7 @@ class Server:
         # include connection data received from the server.
         return self.submit_job(job)  # type: ignore[return-value]
-    def submit_job(self, job: Job | JupyterJob) -> Job | JupyterJob:
+    def submit_job(self, job: Job | JupyterJob, verbose: bool = True) -> Job | JupyterJob:
         """Submits the input job to the managed BioHPC server via SLURM job manager.
         This method submits various jobs for execution via the SLURM-managed BioHPC cluster. As part of its runtime, the
@@ -315,6 +328,9 @@ class Server:
         Args:
             job: The Job object that contains all job data.
+            verbose: Determines whether to notify the user about non-error states of the job submission task. Typically,
+                this is disabled when batch-submitting jobs (for example, as part of running a processing pipeline) and
+                enabled when submitting single jobs.
         Returns:
             The job object whose 'job_id' attribute had been modified with the job ID if the job was successfully
@@ -323,7 +339,8 @@ class Server:
         Raises:
             RuntimeError: If job submission to the server fails.
         """
-        console.echo(message=f"Submitting '{job.job_name}' job to the remote server {self.host}...")
+        if verbose:
+            console.echo(message=f"Submitting '{job.job_name}' job to the remote server {self.host}...")
         # Generates a temporary shell script on the local machine. Uses tempfile to automatically remove the
         # local script as soon as it is uploaded to the server.
@@ -332,7 +349,7 @@ class Server:
             fixed_script_content = job.command_script
             # Creates a temporary script file locally and dumps translated command data into the file
-            with open(local_script_path, "w") as f:
+            with local_script_path.open("w") as f:
                 f.write(fixed_script_content)
             # Uploads the command script to the server
@@ -400,6 +417,9 @@ class Server:
                 timer.delay_noblock(delay=5, allow_sleep=True)  # Waits for 5 seconds before checking again
             else:
+                # Aborts the job if the server is busy running other jobs
+                self.abort_job(job=job)
                 # Only raises the timeout error if the while loop is not broken in 120 seconds
                 message = (
                     f"Remote jupyter server job {job.job_name} with id {job.job_id} did not start within 120 seconds "
@@ -409,7 +429,8 @@ class Server:
                 console.error(message, TimeoutError)
                 raise TimeoutError(message)  # Fallback to appease mypy
-        console.echo(message=f"{job.job_name} job: Submitted to {self.host}.", level=LogLevel.SUCCESS)
+        if verbose:
+            console.echo(message=f"{job.job_name} job: Submitted to {self.host}.", level=LogLevel.SUCCESS)
         # Returns the updated job object
         return job
@@ -603,7 +624,7 @@ class Server:
             sftp.rmdir(str(remote_path))
         except Exception as e:
-            console.echo(f"Unable to remove the specified directory {remote_path}: {str(e)}", level=LogLevel.WARNING)
+            console.echo(f"Unable to remove the specified directory {remote_path}: {e!s}", level=LogLevel.WARNING)
     def create_directory(self, remote_path: Path, parents: bool = True) -> None:
         """Creates the specified directory tree on the managed remote server via SFTP.
@@ -672,13 +693,14 @@ class Server:
             # Checks if the target file or directory exists by trying to 'stat' it
             sftp.stat(str(remote_path))
-            # If the request does not err, returns True (file or directory exists)
-            return True
         # If the directory or file does not exist, returns False
         except FileNotFoundError:
             return False
+        else:
+            # If the request does not err, returns True (file or directory exists)
+            return True
     def close(self) -> None:
         """Closes the SSH connection to the server.
@@ -723,3 +745,13 @@ class Server:
     def user(self) -> str:
         """Returns the username used to authenticate with the server."""
         return self._credentials.username
+    @property
+    def suite2p_configurations_directory(self) -> Path:
+        """Returns the absolute path to the shared directory that stores all sl-suite2p runtime configuration files."""
+        return self.raw_data_root.joinpath("suite2p_configurations")
+    @property
+    def dlc_projects_directory(self) -> Path:
+        """Returns the absolute path to the shared directory that stores all DeepLabCut projects."""
+        return self.raw_data_root.joinpath("deeplabcut_projects")

sl_shared_assets/tools/__init__.py CHANGED Viewed

@@ -1,22 +1,23 @@
 """This package provides helper tools used to automate routine operations, such as transferring or verifying the
 integrity of the data. The tools from this package are used by most other data processing libraries in the lab."""
-from .transfer_tools import transfer_directory
-from .ascension_tools import ascend_tyche_data
+from .transfer_tools import delete_directory, transfer_directory
 from .packaging_tools import calculate_directory_checksum
 from .project_management_tools import (
     ProjectManifest,
-    resolve_p53_marker,
-    verify_session_checksum,
+    archive_session,
+    prepare_session,
+    resolve_checksum,
     generate_project_manifest,
 )
 __all__ = [
     "ProjectManifest",
-    "transfer_directory",
+    "archive_session",
     "calculate_directory_checksum",
-    "ascend_tyche_data",
-    "verify_session_checksum",
+    "delete_directory",
     "generate_project_manifest",
-    "resolve_p53_marker",
+    "prepare_session",
+    "resolve_checksum",
+    "transfer_directory",
 ]

sl_shared_assets/tools/packaging_tools.py CHANGED Viewed

@@ -1,5 +1,6 @@
-"""This module provides methods for packaging session runtime data for transmission over the network. The methods from
-this module work in tandem with methods offered by transfer_tools.py to ensure the integrity of the transferred data.
+"""This module provides tools for packaging data for transmission. Although this module is primarily used when
+transmitting data over the network, it also works for local (within-machine) transfers. The tools from
+this module work in tandem with tools offered by transfer_tools.py to ensure the integrity of the transferred data.
 """
 import os
@@ -10,8 +11,6 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
 from tqdm import tqdm
 import xxhash
-from ..data_classes import TrackerFileNames
 # Defines a 'blacklist' set of files. Primarily, this list contains the service files that may change after the session
 # data has been acquired. Therefore, it does not make sense to include them in the checksum, as they do not reflect the
 # data that should remain permanently unchanged. Note, make sure all service files are added to this set!
@@ -19,22 +18,16 @@ _excluded_files = {
     "ax_checksum.txt",
     "ubiquitin.bin",
     "telomere.bin",
-    "p53.bin",
     "nk.bin",
 }
-# Extends the exclusion set to include all tracker .yaml files and their concurrent access .lock files.
-for name in tuple(TrackerFileNames):
-    _excluded_files.add(name)
-    _excluded_files.add(f"{name}.lock")
 def _calculate_file_checksum(base_directory: Path, file_path: Path) -> tuple[str, bytes]:
-    """Calculates xxHash3-128 checksum for a single file and its path relative to the base directory.
+    """Calculates xxHash3-128 checksum for the target file and its path relative to the base directory.
     This function is passed to parallel workers used by the calculate_directory_hash() method that iteratively
     calculates the checksum for all files inside a directory. Each call to this function returns the checksum for the
-    target file, which includes both the contents of the file and its path relative to the base directory.
+    target file, which reflects both the contents of the file and its path relative to the base directory.
     Args:
         base_directory: The path to the base (root) directory which is being checksummed by the main
@@ -55,7 +48,7 @@ def _calculate_file_checksum(base_directory: Path, file_path: Path) -> tuple[str
     # Extends the checksum to reflect the file data state. Uses 8 MB chunks to avoid excessive RAM hogging at the cost
     # of slightly reduced throughput.
-    with open(file_path, "rb") as f:
+    with file_path.open("rb") as f:
         for chunk in iter(lambda: f.read(1024 * 1024 * 8), b""):
             checksum.update(chunk)
@@ -71,18 +64,14 @@ def calculate_directory_checksum(
     """Calculates xxHash3-128 checksum for the input directory, which includes the data of all contained files and
     the directory structure information.
-    This function is used to generate a checksum for the raw_data directory of each experiment or training session.
-    Checksums are used to verify the session data integrity during transmission between the PC that acquired the data
-    and long-term storage locations, such as the Synology NAS or the BioHPC server. The function can be configured to
-    write the generated checksum as a hexadecimal string to the ax_checksum.txt file stored at the highest level of the
-    input directory.
+    Checksums are used to verify the data integrity during transmission within machines (from one storage volume to
+    another) and between machines. The function can be configured to write the generated checksum as a hexadecimal
+    string to the ax_checksum.txt file stored at the highest level of the input directory.
     Note:
-        This method uses multiprocessing to efficiently parallelize checksum calculation for multiple files. In
-        combination with xxHash3, this achieves a significant speedup over more common checksums, such as MD5 and
-        SHA256. Note that xxHash3 is not suitable for security purposes and is only used to ensure data integrity.
-        The method notifies the user about the checksum calculation process via the terminal.
+        This function uses multiprocessing to efficiently parallelize checksum calculation for multiple files. In
+        combination with xxHash3, this achieves a significant speedup over other common checksum options, such as MD5
+        and SHA256. Note that xxHash3 is not suitable for security purposes and is only used to ensure data integrity.
         The returned checksum accounts for both the contents of each file and the layout of the input directory
         structure.
@@ -145,8 +134,8 @@ def calculate_directory_checksum(
     # Writes the hash to ax_checksum.txt in the root directory
     if save_checksum:
-        checksum_path = directory / "ax_checksum.txt"
-        with open(checksum_path, "w") as f:
+        checksum_path = directory.joinpath("ax_checksum.txt")
+        with checksum_path.open("w") as f:
             f.write(checksum_hexstr)
     return checksum_hexstr

sl-shared-assets 4.0.0__py3-none-any.whl → 5.0.0__py3-none-any.whl

Potentially problematic release.

sl-shared-assets 4.0.0py3-none-any.whl → 5.0.0py3-none-any.whl