PyPI - sl-shared-assets - Versions diffs - 2.0.0__py3-none-any.whl → 3.0.0__py3-none-any.whl - Mend

sl-shared-assets 2.0.0py3-none-any.whl → 3.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (32) hide show

sl_shared_assets/__init__.py +17 -9
sl_shared_assets/__init__.pyi +12 -8
sl_shared_assets/cli.py +266 -20
sl_shared_assets/cli.pyi +46 -5
sl_shared_assets/data_classes/__init__.py +8 -3
sl_shared_assets/data_classes/__init__.pyi +8 -4
sl_shared_assets/data_classes/configuration_data.py +149 -30
sl_shared_assets/data_classes/configuration_data.pyi +49 -11
sl_shared_assets/data_classes/runtime_data.py +70 -49
sl_shared_assets/data_classes/runtime_data.pyi +41 -33
sl_shared_assets/data_classes/session_data.py +193 -253
sl_shared_assets/data_classes/session_data.pyi +99 -116
sl_shared_assets/data_classes/surgery_data.py +1 -1
sl_shared_assets/server/__init__.py +2 -2
sl_shared_assets/server/__init__.pyi +5 -2
sl_shared_assets/server/job.py +229 -1
sl_shared_assets/server/job.pyi +111 -0
sl_shared_assets/server/server.py +431 -31
sl_shared_assets/server/server.pyi +158 -15
sl_shared_assets/tools/__init__.py +2 -1
sl_shared_assets/tools/__init__.pyi +2 -0
sl_shared_assets/tools/ascension_tools.py +9 -21
sl_shared_assets/tools/ascension_tools.pyi +1 -1
sl_shared_assets/tools/packaging_tools.py +2 -2
sl_shared_assets/tools/project_management_tools.py +147 -41
sl_shared_assets/tools/project_management_tools.pyi +45 -6
{sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/METADATA +127 -13
sl_shared_assets-3.0.0.dist-info/RECORD +36 -0
{sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/entry_points.txt +2 -0
sl_shared_assets-2.0.0.dist-info/RECORD +0 -36
{sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/WHEEL +0 -0
{sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/licenses/LICENSE +0 -0

sl_shared_assets/server/server.pyi CHANGED Viewed

@@ -1,34 +1,42 @@
 from pathlib import Path
-from dataclasses import dataclass
+from dataclasses import field, dataclass
+from _typeshed import Incomplete
 from simple_slurm import Slurm as Slurm
 from paramiko.client import SSHClient as SSHClient
 from ataraxis_data_structures import YamlConfig
-from .job import Job as Job
+from .job import (
+    Job as Job,
+    JupyterJob as JupyterJob,
+)
 def generate_server_credentials(
     output_directory: Path,
     username: str,
     password: str,
     host: str = "cbsuwsun.biohpc.cornell.edu",
-    raw_data_root: str = "/workdir/sun_data",
-    processed_data_root: str = "/storage/sun_data",
+    storage_root: str = "/local/workdir",
+    working_root: str = "/local/storage",
+    shared_directory_name: str = "sun_data",
 ) -> None:
     """Generates a new server_credentials.yaml file under the specified directory, using input information.
     This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
-    only used when setting up new host-computers in the lab.
+    only used when setting up new host-computers or users in the lab.
     Args:
         output_directory: The directory where to save the generated server_credentials.yaml file.
         username: The username to use for server authentication.
         password: The password to use for server authentication.
         host: The hostname or IP address of the server to connect to.
-        raw_data_root: The path to the root directory used to store the raw data from all Sun lab projects on the
-            server.
-        processed_data_root: The path to the root directory used to store the processed data from all Sun lab projects
-            on the server.
+        storage_root: The path to the root storage (slow) server directory. Typically, this is the path to the
+            top-level (root) directory of the HDD RAID volume.
+        working_root: The path to the root working (fast) server directory. Typically, this is the path to the
+            top-level (root) directory of the NVME RAID volume. If the server uses the same volume for both storage and
+            working directories, enter the same path under both 'storage_root' and 'working_root'.
+        shared_directory_name: The name of the shared directory used to store all Sun lab project data on the storage
+            and working server volumes.
     """
 @dataclass()
 class ServerCredentials(YamlConfig):
@@ -43,8 +51,15 @@ class ServerCredentials(YamlConfig):
     username: str = ...
     password: str = ...
     host: str = ...
-    raw_data_root: str = ...
-    processed_data_root: str = ...
+    storage_root: str = ...
+    working_root: str = ...
+    shared_directory_name: str = ...
+    raw_data_root: str = field(init=False, default_factory=Incomplete)
+    processed_data_root: str = field(init=False, default_factory=Incomplete)
+    user_data_root: str = field(init=False, default_factory=Incomplete)
+    user_working_root: str = field(init=False, default_factory=Incomplete)
+    def __post_init__(self) -> None:
+        """Statically resolves the paths to end-point directories using provided root directories."""
 class Server:
     """Encapsulates access to the Sun lab BioHPC processing server.
@@ -75,7 +90,79 @@ class Server:
     def __init__(self, credentials_path: Path) -> None: ...
     def __del__(self) -> None:
         """If the instance is connected to the server, terminates the connection before the instance is destroyed."""
-    def submit_job(self, job: Job) -> Job:
+    def create_job(
+        self, job_name: str, conda_environment: str, cpus_to_use: int = 10, ram_gb: int = 10, time_limit: int = 60
+    ) -> Job:
+        """Creates and returns a new Job instance.
+        Use this method to generate Job objects for all headless jobs that need to be run on the remote server. The
+        generated Job is a precursor that requires further configuration by the user before it can be submitted to the
+        server for execution.
+        Args:
+            job_name: The descriptive name of the SLURM job to be created. Primarily, this name is used in terminal
+                printouts to identify the job to human operators.
+            conda_environment: The name of the conda environment to activate on the server before running the job logic.
+                The environment should contain the necessary Python packages and CLIs to support running the job's
+                logic.
+            cpus_to_use: The number of CPUs to use for the job.
+            ram_gb: The amount of RAM to allocate for the job, in Gigabytes.
+            time_limit: The maximum time limit for the job, in minutes. If the job is still running at the end of this
+                time period, it will be forcibly terminated. It is highly advised to always set adequate maximum runtime
+                limits to prevent jobs from hogging the server in case of runtime or algorithm errors.
+        Returns:
+            The initialized Job instance pre-filled with SLURM configuration data and conda activation commands. Modify
+            the returned instance with any additional commands as necessary for the job to fulfill its intended
+            purpose. Note, the Job requires submission via submit_job() to be executed by the server.
+        """
+    def launch_jupyter_server(
+        self,
+        job_name: str,
+        conda_environment: str,
+        notebook_directory: Path,
+        cpus_to_use: int = 2,
+        ram_gb: int = 32,
+        time_limit: int = 240,
+        port: int = 0,
+        jupyter_args: str = "",
+    ) -> JupyterJob:
+        """Launches a Jupyter notebook server on the target remote Sun lab server.
+        Use this method to run interactive Jupyter sessions on the remote server under SLURM control. Unlike the
+        create_job(), this method automatically submits the job for execution as part of its runtime. Therefore, the
+        returned JupyterJob instance should only be used to query information about how to connect to the remote
+        Jupyter server.
+        Args:
+            job_name: The descriptive name of the Jupyter SLURM job to be created. Primarily, this name is used in
+                terminal printouts to identify the job to human operators.
+            conda_environment: The name of the conda environment to activate on the server before running the job logic.
+                The environment should contain the necessary Python packages and CLIs to support running the job's
+                logic. For Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
+            port: The connection port number for Jupyter server. If set to 0 (default), a random port number between
+                8888 and 9999 will be assigned to this connection to reduce the possibility of colliding with other
+                user sessions.
+            notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have GUI
+                access to items stored in or under this directory. For most runtimes, this should be set to the user's
+                root data or working directory.
+            cpus_to_use: The number of CPUs to allocate to the Jupyter server. Keep this value as small as possible to
+                avoid interfering with headless data processing jobs.
+            ram_gb: The amount of RAM, in GB, to allocate to the Jupyter server. Keep this value as small as possible to
+                avoid interfering with headless data processing jobs.
+            time_limit: The maximum Jupyter server uptime, in minutes. Set this to the expected duration of your jupyter
+                session.
+            jupyter_args: Stores additional arguments to pass to jupyter notebook initialization command.
+        Returns:
+            The initialized JupyterJob instance that stores information on how to connect to the created Jupyter server.
+            Do NOT re-submit the job to the server, as this is done as part of this method's runtime.
+        Raises:
+            TimeoutError: If the target Jupyter server doesn't start within 120 minutes from this method being called.
+            RuntimeError: If job submission fails for any reason.
+        """
+    def submit_job(self, job: Job | JupyterJob) -> Job | JupyterJob:
         """Submits the input job to the managed BioHPC server via SLURM job manager.
         This method submits various jobs for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
@@ -92,7 +179,7 @@ class Server:
         Raises:
             RuntimeError: If job submission to the server fails.
         """
-    def job_complete(self, job: Job) -> bool:
+    def job_complete(self, job: Job | JupyterJob) -> bool:
         """Returns True if the job managed by the input Job instance has been completed or terminated its runtime due
         to an error.
@@ -105,6 +192,16 @@ class Server:
             ValueError: If the input Job object does not contain a valid job_id, suggesting that it has not been
                 submitted to the server.
         """
+    def abort_job(self, job: Job | JupyterJob) -> None:
+        """Aborts the target job if it is currently running on the server.
+        Use this method to immediately abort running or queued jobs, without waiting for the timeout guard. If the job
+        is queued, this method will remove it from the SLURM queue. If the job is already terminated, this method will
+        do nothing.
+        Args:
+            job: The Job object that needs to be aborted.
+        """
     def pull_file(self, local_file_path: Path, remote_file_path: Path) -> None:
         """Moves the specified file from the remote server to the local machine.
@@ -119,6 +216,20 @@ class Server:
             local_file_path: The path to the file that needs to be copied to the remote server.
             remote_file_path: The path to the file on the remote server (where to copy the file).
         """
+    def pull_directory(self, local_directory_path: Path, remote_directory_path: Path) -> None:
+        """Recursively downloads the entire target directory from the remote server to the local machine.
+        Args:
+            local_directory_path: The path to the local directory where the remote directory will be copied.
+            remote_directory_path: The path to the directory on the remote server to be downloaded.
+        """
+    def push_directory(self, local_directory_path: Path, remote_directory_path: Path) -> None:
+        """Recursively uploads the entire target directory from the local machine to the remote server.
+        Args:
+            local_directory_path: The path to the local directory to be uploaded.
+            remote_directory_path: The path on the remote server where the directory will be copied.
+        """
     def remove(self, remote_path: Path, is_dir: bool) -> None:
         """Removes the specified file or directory from the remote server.
@@ -126,18 +237,50 @@ class Server:
             remote_path: The path to the file or directory on the remote server to be removed.
             is_dir: Determines whether the input path represents a directory or a file.
         """
+    def create_directory(self, remote_path: Path, parents: bool = True) -> None:
+        """Creates the specified directory tree on the managed remote server via SFTP.
+        This method creates directories on the remote server, with options to create parent directories and handle
+        existing directories gracefully.
+        Args:
+            remote_path: The absolute path to the directory to create on the remote server, relative to the server
+                root.
+            parents: Determines whether to create parent directories, if they are missing. Otherwise, if parents do not
+                exist, raises a FileNotFoundError.
+        Notes:
+            This method silently assumes that it is fine if the directory already exists and treats it as a successful
+            runtime end-point.
+        """
+    def exists(self, remote_path: Path) -> bool:
+        """Returns True if the target file or directory exists on the remote server."""
     def close(self) -> None:
         """Closes the SSH connection to the server.
         This method has to be called before destroying the class instance to ensure proper resource cleanup.
         """
     @property
-    def raw_data_root(self) -> str:
+    def raw_data_root(self) -> Path:
         """Returns the absolute path to the directory used to store the raw data for all Sun lab projects on the server
         accessible through this class.
         """
     @property
-    def processed_data_root(self) -> str:
+    def processed_data_root(self) -> Path:
         """Returns the absolute path to the directory used to store the processed data for all Sun lab projects on the
         server accessible through this class.
         """
+    @property
+    def user_data_root(self) -> Path:
+        """Returns the absolute path to the directory used to store user-specific data on the server accessible through
+        this class."""
+    @property
+    def user_working_root(self) -> Path:
+        """Returns the absolute path to the user-specific working (fast) directory on the server accessible through
+        this class."""
+    @property
+    def host(self) -> str:
+        """Returns the hostname or IP address of the server accessible through this class."""
+    @property
+    def user(self) -> str:
+        """Returns the username used to authenticate with the server."""

sl_shared_assets/tools/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ integrity of the data. The tools from this package are used by most other data p
 from .transfer_tools import transfer_directory
 from .ascension_tools import ascend_tyche_data
 from .packaging_tools import calculate_directory_checksum
-from .project_management_tools import verify_session_checksum, generate_project_manifest
+from .project_management_tools import resolve_p53_marker, verify_session_checksum, generate_project_manifest
 __all__ = [
     "transfer_directory",
@@ -12,4 +12,5 @@ __all__ = [
     "ascend_tyche_data",
     "verify_session_checksum",
     "generate_project_manifest",
+    "resolve_p53_marker",
 ]

sl_shared_assets/tools/__init__.pyi CHANGED Viewed

@@ -2,6 +2,7 @@ from .transfer_tools import transfer_directory as transfer_directory
 from .ascension_tools import ascend_tyche_data as ascend_tyche_data
 from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
 from .project_management_tools import (
+    resolve_p53_marker as resolve_p53_marker,
     verify_session_checksum as verify_session_checksum,
     generate_project_manifest as generate_project_manifest,
 )
@@ -12,4 +13,5 @@ __all__ = [
     "ascend_tyche_data",
     "verify_session_checksum",
     "generate_project_manifest",
+    "resolve_p53_marker",
 ]

sl_shared_assets/tools/ascension_tools.py CHANGED Viewed

@@ -7,10 +7,10 @@ from pathlib import Path
 import datetime
 import numpy as np
-from ataraxis_base_utilities import LogLevel, console, ensure_directory_exists
+from ataraxis_base_utilities import LogLevel, console
 from ataraxis_time.time_helpers import extract_timestamp_from_bytes
-from ..data_classes import SessionData, ProjectConfiguration, get_system_configuration_data
+from ..data_classes import SessionData, SessionTypes, get_system_configuration_data
 from .transfer_tools import transfer_directory
 from .packaging_tools import calculate_directory_checksum
@@ -194,26 +194,12 @@ def ascend_tyche_data(root_directory: Path) -> None:
         root_directory: The directory that stores one or more Tyche animal folders. This can be conceptualized as the
             root directory for the Tyche project.
     """
-    # Generates a (shared) project configuration file.
-    project_configuration = ProjectConfiguration()
     # The acquisition system config resolves most paths and filesystem configuration arguments
     acquisition_system = get_system_configuration_data()
-    output_root_directory = acquisition_system.paths.root_directory
     server_root_directory = acquisition_system.paths.server_storage_directory
     # Statically defines project name and local root paths
     project_name = "Tyche"
-    project_configuration.project_name = project_name
-    # Uses nonsensical google sheet IDs. Tyche project did not use Google Sheet processing like our modern projects do.
-    project_configuration.water_log_sheet_id = "1xFh9Q2zT7pL3mVkJdR8bN6yXoE4wS5aG0cHu2Kf7D3v"
-    project_configuration.surgery_sheet_id = "1xFh9Q2zT7pL3mVkJdR8bN6yXoE4wS5aG0cHu2Kf7D3v"
-    # Dumps project configuration into the 'configuration' subfolder of the Tyche project.
-    configuration_path = output_root_directory.joinpath("Tyche", "configuration", "project_configuration.yaml")
-    ensure_directory_exists(configuration_path)
-    project_configuration.save(path=configuration_path)
     # Assumes that root directory stores all animal folders to be processed
     for animal_folder in root_directory.iterdir():
@@ -230,17 +216,19 @@ def ascend_tyche_data(root_directory: Path) -> None:
                 # This procedure generates timestamp-based session names, analogous to how our modern pipeline does it.
                 session_name = _generate_session_name(acquisition_path=acquisition_folder)
-                # Uses derived session name and the statically created project configuration file to create the
-                # session data hierarchy using the output root. This generates a 'standard' Sun lab directory structure
-                # for the Tyche data.
+                # Uses derived session name and the derived project name to create the session data hierarchy using the
+                # output root. This generates a 'standard' Sun lab directory structure for the Tyche data.
                 session_data = SessionData.create(
-                    project_name=project_configuration.project_name,
+                    project_name=project_name,
                     session_name=session_name,
                     animal_id=animal_name,
-                    session_type="mesoscope experiment",
+                    session_type=SessionTypes.MESOSCOPE_EXPERIMENT,
                     experiment_name=None,
                 )
+                # Since this runtime reprocesses already acquired data, marks the session as fully initialized.
+                session_data.runtime_initialized()
                 # Moves the data from the old hierarchy to the new hierarchy. If the process runs as expected, and
                 # fully empties the source acquisition folder, destroys the folder. Otherwise, notifies the user that
                 # the runtime did not fully process the session data and requests intervention.

sl_shared_assets/tools/ascension_tools.pyi CHANGED Viewed

@@ -2,7 +2,7 @@ from pathlib import Path
 from ..data_classes import (
     SessionData as SessionData,
-    ProjectConfiguration as ProjectConfiguration,
+    SessionTypes as SessionTypes,
     get_system_configuration_data as get_system_configuration_data,
 )
 from .transfer_tools import transfer_directory as transfer_directory

sl_shared_assets/tools/packaging_tools.py CHANGED Viewed

@@ -17,13 +17,13 @@ _excluded_files = {
     "ax_checksum.txt",
     "ubiquitin.bin",
     "telomere.bin",
+    "p53.bin",
+    "nk.bin",
     "suite2p_processing_tracker.yaml",
     "dataset_formation_tracker.yaml",
-    "behavior_processing_tracker.yaml",
     "video_processing_tracker.yaml",
     "integrity_verification_tracker.yaml",
     "suite2p_processing_tracker.yaml.lock",
-    "dataset_formation_tracker.yaml.lock",
     "behavior_processing_tracker.yaml.lock",
     "video_processing_tracker.yaml.lock",
     "integrity_verification_tracker.yaml.lock",

sl-shared-assets 2.0.0__py3-none-any.whl → 3.0.0__py3-none-any.whl

Potentially problematic release.

sl-shared-assets 2.0.0py3-none-any.whl → 3.0.0py3-none-any.whl