PyPI - sl-shared-assets - Versions diffs - 2.0.1__py3-none-any.whl → 3.0.0__py3-none-any.whl - Mend

sl-shared-assets 2.0.1py3-none-any.whl → 3.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (32) hide show

sl_shared_assets/__init__.py +17 -9
sl_shared_assets/__init__.pyi +12 -8
sl_shared_assets/cli.py +258 -21
sl_shared_assets/cli.pyi +44 -5
sl_shared_assets/data_classes/__init__.py +8 -3
sl_shared_assets/data_classes/__init__.pyi +8 -4
sl_shared_assets/data_classes/configuration_data.py +149 -30
sl_shared_assets/data_classes/configuration_data.pyi +49 -11
sl_shared_assets/data_classes/runtime_data.py +70 -49
sl_shared_assets/data_classes/runtime_data.pyi +41 -33
sl_shared_assets/data_classes/session_data.py +193 -253
sl_shared_assets/data_classes/session_data.pyi +99 -116
sl_shared_assets/data_classes/surgery_data.py +1 -1
sl_shared_assets/server/__init__.py +2 -2
sl_shared_assets/server/__init__.pyi +5 -2
sl_shared_assets/server/job.py +229 -1
sl_shared_assets/server/job.pyi +111 -0
sl_shared_assets/server/server.py +431 -31
sl_shared_assets/server/server.pyi +158 -15
sl_shared_assets/tools/__init__.py +2 -1
sl_shared_assets/tools/__init__.pyi +2 -0
sl_shared_assets/tools/ascension_tools.py +9 -21
sl_shared_assets/tools/ascension_tools.pyi +1 -1
sl_shared_assets/tools/packaging_tools.py +2 -2
sl_shared_assets/tools/project_management_tools.py +147 -41
sl_shared_assets/tools/project_management_tools.pyi +45 -6
{sl_shared_assets-2.0.1.dist-info → sl_shared_assets-3.0.0.dist-info}/METADATA +127 -13
sl_shared_assets-3.0.0.dist-info/RECORD +36 -0
{sl_shared_assets-2.0.1.dist-info → sl_shared_assets-3.0.0.dist-info}/entry_points.txt +2 -0
sl_shared_assets-2.0.1.dist-info/RECORD +0 -36
{sl_shared_assets-2.0.1.dist-info → sl_shared_assets-3.0.0.dist-info}/WHEEL +0 -0
{sl_shared_assets-2.0.1.dist-info → sl_shared_assets-3.0.0.dist-info}/licenses/LICENSE +0 -0

sl_shared_assets/data_classes/session_data.pyi CHANGED Viewed

@@ -1,84 +1,41 @@
+from enum import StrEnum
 from pathlib import Path
 from dataclasses import field, dataclass
 from _typeshed import Incomplete
 from ataraxis_data_structures import YamlConfig
-from .configuration_data import get_system_configuration_data as get_system_configuration_data
+from .configuration_data import (
+    AcquisitionSystems as AcquisitionSystems,
+    get_system_configuration_data as get_system_configuration_data,
+)
-_valid_session_types: Incomplete
+class SessionTypes(StrEnum):
+    """Defines the set of data acquisition session types supported by various data acquisition systems used in the
+    Sun lab.
-@dataclass()
-class VersionData(YamlConfig):
-    """Stores information about the versions of important Sun lab libraries used to acquire the session's data."""
-    python_version: str = ...
-    sl_experiment_version: str = ...
-@dataclass()
-class ProjectConfiguration(YamlConfig):
-    """Stores the project-specific configuration parameters that do not change between different animals and runtime
-    sessions.
-    An instance of this class is generated and saved as a .yaml file in the 'configuration' directory of each project
-    when it is created. After that, the stored data is reused for every runtime (training or experiment session) carried
-    out for each animal of the project. Additionally, a copy of the most actual configuration file is saved inside each
-    runtime session's 'raw_data' folder, providing seamless integration between the managed data and various Sun lab
-    (sl-) libraries.
+    A data acquisition session broadly encompasses a recording session carried out to either: acquire experiment data,
+    train the animal for the upcoming experiments, or to assess the quality of surgical or other pre-experiment
+    intervention.
     Notes:
-        Together with SessionData, this class forms the entry point for all interactions with the data acquired in the
-        Sun lab. The fields of this class are used to flexibly configure the runtime behavior of major data acquisition
-        (sl-experiment) and processing (sl-forgery) libraries, adapting them for any project in the lab.
+        This enumeration does not differentiate between different acquisition systems. Different acquisition systems
+        support different session types, and may not be suited for acquiring some of the session types listed in this
+        enumeration.
     """
-    project_name: str = ...
-    surgery_sheet_id: str = ...
-    water_log_sheet_id: str = ...
-    @classmethod
-    def load(cls, configuration_path: Path) -> ProjectConfiguration:
-        """Loads the project configuration parameters from the specified project_configuration.yaml file.
-        This method is called during each interaction with any runtime session's data, including the creation of a new
-        session.
-        Args:
-            configuration_path: The path to the project_configuration.yaml file from which to load the data.
-        Returns:
-            The initialized ProjectConfiguration instance that stores the configuration data for the target project.
-        Raise:
-            FileNotFoundError: If the specified configuration file does not exist or is not a valid YAML file.
-        """
-    def save(self, path: Path) -> None:
-        """Saves class instance data to disk as a project_configuration.yaml file.
-        This method is automatically called from the 'sl_experiment' library when a new project is created. After this
-        method's runtime, all future project initialization calls will use the load() method to reuse configuration data
-        saved to the .yaml file created by this method.
-        Args:
-            path: The path to the .yaml file to save the data to.
-        """
-    def _verify_data(self) -> None:
-        """Verifies the user-modified data loaded from the project_configuration.yaml file.
-        Since this class is explicitly designed to be modified by the user, this verification step is carried out to
-        ensure that the loaded data matches expectations. This reduces the potential for user errors to impact the
-        runtime behavior of the libraries using this class. This internal method is automatically called by the load()
-        method.
-        Raises:
-            ValueError: If the loaded data does not match expected formats or values.
-        """
+    LICK_TRAINING = "lick training"
+    RUN_TRAINING = "run training"
+    MESOSCOPE_EXPERIMENT = "mesoscope experiment"
+    WINDOW_CHECKING = "window checking"
 @dataclass()
 class RawData:
     """Stores the paths to the directories and files that make up the 'raw_data' session-specific directory.
-    The raw_data directory stores the data acquired during the session runtime before and after preprocessing. Since
-    preprocessing does not alter the data, any data in that folder is considered 'raw'.
+    The raw_data directory stores the data acquired during the session data acquisition runtime, before and after
+    preprocessing. Since preprocessing does not irreversibly alter the data, any data in that folder is considered
+    'raw,' event if preprocessing losslessly re-compresses the data for efficient transfer.
     Notes:
         Sun lab data management strategy primarily relies on keeping multiple redundant copies of the raw_data for
@@ -94,7 +51,6 @@ class RawData:
     session_descriptor_path: Path = ...
     hardware_state_path: Path = ...
     surgery_metadata_path: Path = ...
-    project_configuration_path: Path = ...
     session_data_path: Path = ...
     experiment_configuration_path: Path = ...
     mesoscope_positions_path: Path = ...
@@ -103,21 +59,24 @@ class RawData:
     checksum_path: Path = ...
     telomere_path: Path = ...
     ubiquitin_path: Path = ...
+    nk_path: Path = ...
     integrity_verification_tracker_path: Path = ...
-    version_data_path: Path = ...
     def resolve_paths(self, root_directory_path: Path) -> None:
         """Resolves all paths managed by the class instance based on the input root directory path.
-        This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
-        machine that instantiates the class.
+        This method is called each time the (wrapper) SessionData class is instantiated to regenerate the managed path
+        hierarchy on any machine that instantiates the class.
         Args:
-            root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
-                hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
-                the managed session.
+            root_directory_path: The path to the top-level directory of the session. Typically, this path is assembled
+                using the following hierarchy: root/project/animal/session_id
         """
     def make_directories(self) -> None:
-        """Ensures that all major subdirectories and the root directory exist, creating any missing directories."""
+        """Ensures that all major subdirectories and the root directory exist, creating any missing directories.
+        This method is called each time the (wrapper) SessionData class is instantiated and allowed to generate
+        missing data directories.
+        """
 @dataclass()
 class ProcessedData:
@@ -132,53 +91,52 @@ class ProcessedData:
     camera_data_path: Path = ...
     mesoscope_data_path: Path = ...
     behavior_data_path: Path = ...
-    job_logs_path: Path = ...
     suite2p_processing_tracker_path: Path = ...
-    dataset_formation_tracker_path: Path = ...
     behavior_processing_tracker_path: Path = ...
     video_processing_tracker_path: Path = ...
+    p53_path: Path = ...
     def resolve_paths(self, root_directory_path: Path) -> None:
         """Resolves all paths managed by the class instance based on the input root directory path.
-        This method is called each time the class is instantiated to regenerate the managed path hierarchy on any
-        machine that instantiates the class.
+        This method is called each time the (wrapper) SessionData class is instantiated to regenerate the managed path
+        hierarchy on any machine that instantiates the class.
         Args:
-            root_directory_path: The path to the top-level directory of the local hierarchy. Depending on the managed
-                hierarchy, this has to point to a directory under the main /session, /animal, or /project directory of
-                the managed session.
+            root_directory_path: The path to the top-level directory of the session. Typically, this path is assembled
+                using the following hierarchy: root/project/animal/session_id
         """
     def make_directories(self) -> None:
-        """Ensures that all major subdirectories and the root directory exist, creating any missing directories."""
+        """Ensures that all major subdirectories and the root directory exist, creating any missing directories.
+        This method is called each time the (wrapper) SessionData class is instantiated and allowed to generate
+        missing data directories.
+        """
 @dataclass
 class SessionData(YamlConfig):
-    """Stores and manages the data layout of a single training or experiment session acquired in the Sun lab.
-    The primary purpose of this class is to maintain the session data structure across all supported destinations and
-    during all processing stages. It generates the paths used by all other classes from all Sun lab libraries that
-    interact with the session's data from the point of its creation and until the data is integrated into an
-    analysis dataset.
+    """Stores and manages the data layout of a single Sun lab data acquisition session.
-    When necessary, the class can be used to either generate a new session or load the layout of an already existing
-    session. When the class is used to create a new session, it generates the new session's name using the current
-    UTC timestamp, accurate to microseconds. This ensures that each session name is unique and preserves the overall
-    session order.
+    The primary purpose of this class is to maintain the session data structure across all supported destinations and to
+    provide a unified data access interface shared by all Sun lab libraries. The class can be used to either generate a
+    new session or load the layout of an already existing session. When the class is used to create a new session, it
+    generates the new session's name using the current UTC timestamp, accurate to microseconds. This ensures that each
+    session 'name' is unique and preserves the overall session order.
     Notes:
         This class is specifically designed for working with the data from a single session, performed by a single
         animal under the specific experiment. The class is used to manage both raw and processed data. It follows the
-        data through acquisition, preprocessing and processing stages of the Sun lab data workflow. Together with
-        ProjectConfiguration class, this class serves as an entry point for all interactions with the managed session's
-        data.
+        data through acquisition, preprocessing and processing stages of the Sun lab data workflow. This class serves as
+        an entry point for all interactions with the managed session's data.
     """
     project_name: str
     animal_id: str
     session_name: str
-    session_type: str
-    acquisition_system: str
+    session_type: str | SessionTypes
+    acquisition_system: str | AcquisitionSystems
     experiment_name: str | None
+    python_version: str = ...
+    sl_experiment_version: str = ...
     raw_data: RawData = field(default_factory=Incomplete)
     processed_data: ProcessedData = field(default_factory=Incomplete)
     def __post_init__(self) -> None:
@@ -188,9 +146,11 @@ class SessionData(YamlConfig):
         cls,
         project_name: str,
         animal_id: str,
-        session_type: str,
+        session_type: SessionTypes | str,
         experiment_name: str | None = None,
         session_name: str | None = None,
+        python_version: str = "3.11.13",
+        sl_experiment_version: str = "2.0.0",
     ) -> SessionData:
         """Creates a new SessionData object and generates the new session's data structure on the local PC.
@@ -201,22 +161,27 @@ class SessionData(YamlConfig):
             To load an already existing session data structure, use the load() method instead.
             This method automatically dumps the data of the created SessionData instance into the session_data.yaml file
-            inside the root raw_data directory of the created hierarchy. It also finds and dumps other configuration
-            files, such as project_configuration.yaml, experiment_configuration.yaml, and system_configuration.yaml into
-            the same raw_data directory. This ensures that if the session's runtime is interrupted unexpectedly, the
-            acquired data can still be processed.
+            inside the root 'raw_data' directory of the created hierarchy. It also finds and dumps other configuration
+            files, such as experiment_configuration.yaml and system_configuration.yaml into the same 'raw_data'
+            directory. If the session's runtime is interrupted unexpectedly, the acquired data can still be processed
+            using these pre-saved class instances.
         Args:
-            project_name: The name of the project for which the data is acquired.
-            animal_id: The ID code of the animal for which the data is acquired.
-            session_type: The type of the session. Primarily, this determines how to read the session_descriptor.yaml
-                file. Valid options are 'Lick training', 'Run training', 'Window checking', or 'Experiment'.
-            experiment_name: The name of the experiment executed during managed session. This optional argument is only
-                used for 'Experiment' session types. It is used to find the experiment configuration .YAML file.
-            session_name: An optional session_name override. Generally, this argument should not be provided for most
+            project_name: The name of the project for which the session is carried out.
+            animal_id: The ID code of the animal participating in the session.
+            session_type: The type of the session. Has to be one of the supported session types exposed by the
+                SessionTypes enumeration.
+            experiment_name: The name of the experiment executed during the session. This optional argument is only
+                used for experiment sessions. Note! The name passed to this argument has to match the name of the
+                experiment configuration .yaml file.
+            session_name: An optional session name override. Generally, this argument should not be provided for most
                 sessions. When provided, the method uses this name instead of generating a new timestamp-based name.
                 This is only used during the 'ascension' runtime to convert old data structures to the modern
                 lab standards.
+            python_version: The string that specifies the Python version used to collect session data. Has to be
+                specified using the major.minor.patch version format.
+            sl_experiment_version: The string that specifies the version of the sl-experiment library used to collect
+                session data. Has to be specified using the major.minor.patch version format.
         Returns:
             An initialized SessionData instance that stores the layout of the newly created session's data.
@@ -228,9 +193,9 @@ class SessionData(YamlConfig):
         """Loads the SessionData instance from the target session's session_data.yaml file.
         This method is used to load the data layout information of an already existing session. Primarily, this is used
-        when preprocessing or processing session data. Due to how SessionData is stored and used in the lab, this
-        method always loads the data layout from the session_data.yaml file stored inside the raw_data session
-        subfolder. Currently, all interactions with Sun lab data require access to the 'raw_data' folder.
+        when processing session data. Due to how SessionData is stored and used in the lab, this method always loads the
+        data layout from the session_data.yaml file stored inside the 'raw_data' session subfolder. Currently, all
+        interactions with Sun lab data require access to the 'raw_data' folder of each session.
         Notes:
             To create a new session, use the create() method instead.
@@ -250,11 +215,18 @@ class SessionData(YamlConfig):
         Raises:
             FileNotFoundError: If the 'session_data.yaml' file is not found under the session_path/raw_data/ subfolder.
+        """
+    def runtime_initialized(self) -> None:
+        """Ensures that the 'nk.bin' marker file is removed from the session's raw_data folder.
+        The 'nk.bin' marker is generated as part of the SessionData initialization (creation) process to mark sessions
+        that did not fully initialize during runtime. This service method is designed to be called by the sl-experiment
+        library classes to remove the 'nk.bin' marker when it is safe to do so. It should not be called by end-users.
         """
     def _save(self) -> None:
         """Saves the instance data to the 'raw_data' directory of the managed session as a 'session_data.yaml' file.
-        This is used to save the data stored in the instance to disk, so that it can be reused during preprocessing or
+        This is used to save the data stored in the instance to disk, so that it can be reused during further stages of
         data processing. The method is intended to only be used by the SessionData instance itself during its
         create() method runtime.
         """
@@ -274,6 +246,13 @@ class ProcessingTracker(YamlConfig):
     _is_running: bool = ...
     _lock_path: str = field(init=False)
     def __post_init__(self) -> None: ...
+    def __del__(self) -> None:
+        """If the instance is garbage-collected without calling the stop() method, assumes this is due to a runtime
+        error.
+        It is essential to always resolve the runtime as either 'stopped' or 'erred' to avoid deadlocking the session
+        data.
+        """
     def _load_state(self) -> None:
         """Reads the current processing state from the wrapped .YAML file."""
     def _save_state(self) -> None:
@@ -300,7 +279,11 @@ class ProcessingTracker(YamlConfig):
             TimeoutError: If the file lock for the target .YAML file cannot be acquired within the timeout period.
         """
     def stop(self) -> None:
-        """Mark processing as started.
+        """Configures the tracker file to indicate that the tracked processing runtime has been completed successfully.
+        After this method returns, it is UNSAFE to do any further processing from the process that calls this method.
+        Any process that calls the 'start' method of this class is expected to also call this method or 'error' method
+        at the end of the runtime.
         Raises:
             TimeoutError: If the file lock for the target .YAML file cannot be acquired within the timeout period.
@@ -308,12 +291,12 @@ class ProcessingTracker(YamlConfig):
     @property
     def is_complete(self) -> bool:
         """Returns True if the tracker wrapped by the instance indicates that the processing runtime has been completed
-        successfully and False otherwise."""
+        successfully at least once and that there is no ongoing processing that uses the target session."""
     @property
     def encountered_error(self) -> bool:
-        """Returns True if the tracker wrapped by the instance indicates that the processing runtime aborted due to
-        encountering an error and False otherwise."""
+        """Returns True if the tracker wrapped by the instance indicates that the processing runtime for the target
+        session has aborted due to encountering an error."""
     @property
     def is_running(self) -> bool:
         """Returns True if the tracker wrapped by the instance indicates that the processing runtime is currently
-        running and False otherwise."""
+        running for the target session."""

sl_shared_assets/data_classes/surgery_data.py CHANGED Viewed

@@ -51,7 +51,7 @@ class ProcedureData:
     surgery_quality: int = 0
     """Stores the quality of the surgical intervention as a numeric level. 0 indicates unusable (bad) result, 1
     indicates usable result that is not good enough to be included in a publication, 2 indicates publication-grade
-    result."""
+    result, 3 indicates high-tier publication grade result."""
 @dataclass

sl_shared_assets/server/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@
 and other compute servers. This package is also used across all Sun lab members private code to interface with the
 shared server."""
-from .job import Job
+from .job import Job, JupyterJob
 from .server import Server, ServerCredentials, generate_server_credentials
-__all__ = ["Server", "ServerCredentials", "generate_server_credentials", "Job"]
+__all__ = ["Server", "ServerCredentials", "generate_server_credentials", "Job", "JupyterJob"]

sl_shared_assets/server/__init__.pyi CHANGED Viewed

@@ -1,8 +1,11 @@
-from .job import Job as Job
+from .job import (
+    Job as Job,
+    JupyterJob as JupyterJob,
+)
 from .server import (
     Server as Server,
     ServerCredentials as ServerCredentials,
     generate_server_credentials as generate_server_credentials,
 )
-__all__ = ["Server", "ServerCredentials", "generate_server_credentials", "Job"]
+__all__ = ["Server", "ServerCredentials", "generate_server_credentials", "Job", "JupyterJob"]

sl_shared_assets/server/job.py CHANGED Viewed

@@ -1,13 +1,51 @@
 """This module provides the core Job class, used as the starting point for all SLURM-managed job executed on lab compute
 server(s). Specifically, the Job class acts as a wrapper around the SLURM configuration and specific logic of each
 job. During runtime, Server class interacts with input job objects to manage their transfer and execution on the
-remote servers."""
+remote servers.
+Since version 3.0.0, this module also provides the specialized JupyterJob class used to launch remote Jupyter
+notebook servers.
+"""
 # noinspection PyProtectedMember
+import re
 from pathlib import Path
 import datetime
+from dataclasses import dataclass
+# noinspection PyProtectedMember
 from simple_slurm import Slurm  # type: ignore
+from ataraxis_base_utilities import LogLevel, console
+@dataclass
+class _JupyterConnectionInfo:
+    """Stores the data used to establish the connection with a Jupyter notebook server running under SLURM control on a
+    remote Sun lab server.
+    More specifically, this class is used to transfer the connection metadata collected on the remote server back to
+    the local machine that requested the server to be established.
+    """
+    compute_node: str
+    """The hostname of the compute node where Jupyter is running."""
+    port: int
+    """The port number on which Jupyter is listening for communication. Usually, this is the default port 8888 or 9999.
+    """
+    token: str
+    """The authentication token for the Jupyter server. This token is used to authenticate the user when establishing
+    communication with the Jupyter server."""
+    @property
+    def localhost_url(self) -> str:
+        """Returns the localhost URL for connecting to the server.
+        To use this URL, first set up an SSH tunnel to the server via the specific Jupyter communication port and the
+        remote server access credentials.
+        """
+        return f"http://localhost:{self.port}/?token={self.token}"
 class Job:
@@ -138,3 +176,193 @@ class Job:
         # Returns the script content to caller as a string
         return fixed_script_content
+class JupyterJob(Job):
+    """Specialized Job instance designed to launch a Jupyter notebook server on SLURM.
+    This class extends the base Job class to include Jupyter-specific configuration and commands for starting a
+    notebook server in a SLURM environment. Using this specialized job allows users to set up remote Jupyter servers
+    while still benefitting from SLURM's job management and fair airtime policies.
+    Notes:
+        Jupyter servers directly compete for resources with headless data processing jobs. Therefore, it is important
+        to minimize the resource footprint and the runtime of each Jupyter server, if possible.
+    Args:
+        job_name: The descriptive name of the Jupyter SLURM job to be created. Primarily, this name is used in terminal
+            printouts to identify the job to human operators.
+        output_log: The absolute path to the .txt file on the processing server, where to store the standard output
+            data of the job.
+        error_log: The absolute path to the .txt file on the processing server, where to store the standard error
+            data of the job.
+        working_directory: The absolute path to the directory where temporary job files will be stored. During runtime,
+            classes from this library use that directory to store files such as the job's shell script. All such files
+            are automatically removed from the directory at the end of a non-errors runtime.
+        conda_environment: The name of the conda environment to activate on the server before running the job logic. The
+            environment should contain the necessary Python packages and CLIs to support running the job's logic. For
+            Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
+        port: The connection port number for Jupyter server. Do not change the default value unless you know what you
+            are doing, as the server has most common communication ports closed for security reasons.
+        notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have access to
+            items stored in or under this directory. For most runtimes, this should be set to the user's root data or
+            working directory.
+        cpus_to_use: The number of CPUs to allocate to the Jupyter server. Keep this value as small as possible to avoid
+            interfering with headless data processing jobs.
+        ram_gb: The amount of RAM, in GB, to allocate to the Jupyter server. Keep this value as small as possible to
+            avoid interfering with headless data processing jobs.
+        time_limit: The maximum Jupyter server uptime, in minutes. Set this to the expected duration of your jupyter
+            session.
+        jupyter_args: Stores additional arguments to pass to jupyter notebook initialization command.
+    Attributes:
+        port: Stores the connection port of the managed Jupyter server.
+        notebook_dir: Stores the absolute path to the directory used as Jupyter's root, relative to the remote server
+            root.
+        connection_info: Stores the JupyterConnectionInfo instance after the Jupyter server is instantiated.
+        host: Stores the hostname of the remote server.
+        user: Stores the username used to connect with the remote server.
+        connection_info_file: The absolute path to the file that stores connection information, relative to the remote
+            server root.
+        _command: Stores the shell command for launching the Jupyter server.
+    """
+    def __init__(
+        self,
+        job_name: str,
+        output_log: Path,
+        error_log: Path,
+        working_directory: Path,
+        conda_environment: str,
+        notebook_directory: Path,
+        port: int = 9999,  # Defaults to using port 9999
+        cpus_to_use: int = 2,  # Defaults to 2 CPU cores
+        ram_gb: int = 32,  # Defaults to 32 GB of RAM
+        time_limit: int = 120,  # Defaults to 2 hours of runtime (120 minutes)
+        jupyter_args: str = "",
+    ) -> None:
+        # Initializes parent Job class
+        super().__init__(
+            job_name=job_name,
+            output_log=output_log,
+            error_log=error_log,
+            working_directory=working_directory,
+            conda_environment=conda_environment,
+            cpus_to_use=cpus_to_use,
+            ram_gb=ram_gb,
+            time_limit=time_limit,
+        )
+        # Saves important jupyter configuration parameters to class attributes
+        self.port = port
+        self.notebook_dir = notebook_directory
+        # Similar to job ID, these attributes initialize to None and are reconfigured as part of the job submission
+        # process.
+        self.connection_info: _JupyterConnectionInfo | None = None
+        self.host: str | None = None
+        self.user: str | None = None
+        # Resolves the server-side path to the jupyter server connection info file.
+        self.connection_info_file = working_directory.joinpath(f"{job_name}_connection.txt")
+        # Builds Jupyter launch command.
+        self._build_jupyter_command(jupyter_args)
+    def _build_jupyter_command(self, jupyter_args: str) -> None:
+        """Builds the command to launch Jupyter notebook server on the remote Sun lab server."""
+        # Gets the hostname of the compute node and caches it in the connection data file. Also caches the port name.
+        self.add_command('echo "COMPUTE_NODE: $(hostname)" > {}'.format(self.connection_info_file))
+        self.add_command('echo "PORT: {}" >> {}'.format(self.port, self.connection_info_file))
+        # Generates a random access token for security and caches it in the connection data file.
+        self.add_command("TOKEN=$(openssl rand -hex 24)")
+        self.add_command('echo "TOKEN: $TOKEN" >> {}'.format(self.connection_info_file))
+        # Builds Jupyter startup command.
+        jupyter_cmd = [
+            "jupyter lab",
+            "--no-browser",
+            f"--port={self.port}",
+            "--ip=0.0.0.0",  # Listen on all interfaces
+            "--ServerApp.allow_origin='*'",  # Allow connections from SSH tunnel
+            "--ServerApp.allow_remote_access=True",  # Enable remote access
+            "--ServerApp.disable_check_xsrf=True",  # Helps with proxy connections
+            f"--ServerApp.root_dir={self.notebook_dir}",  # Root directory (not notebook-dir)
+            "--IdentityProvider.token=$TOKEN",  # Token authentication
+        ]
+        # Adds any additional arguments.
+        if jupyter_args:
+            jupyter_cmd.append(jupyter_args)
+        # Adds resolved jupyter command to the list of job commands.
+        jupyter_cmd_str = " ".join(jupyter_cmd)
+        self.add_command(jupyter_cmd_str)
+    def parse_connection_info(self, info_file: Path) -> None:
+        """Parses the connection information file created by the Jupyter job on the server.
+        Use this method to parse the connection file fetched from the server to finalize setting up the Jupyter
+        server job.
+        Args:
+            info_file: The path to the .txt file generated by the remote server that stores the Jupyter connection
+                information to be parsed.
+        """
+        with open(info_file, "r") as f:
+            content = f.read()
+        # Extracts information using regex
+        compute_node_match = re.search(r"COMPUTE_NODE: (.+)", content)
+        port_match = re.search(r"PORT: (\d+)", content)
+        token_match = re.search(r"TOKEN: (.+)", content)
+        if not all([compute_node_match, port_match, token_match]):
+            message = f"Could not parse connection information file for the Jupyter server job with id {self.job_id}."
+            console.error(message, ValueError)
+        # Stores extracted data inside connection_info attribute as a JupyterConnectionInfo instance.
+        self.connection_info = _JupyterConnectionInfo(
+            compute_node=compute_node_match.group(1).strip(),  # type: ignore
+            port=int(port_match.group(1)),  # type: ignore
+            token=token_match.group(1).strip(),  # type: ignore
+        )
+    def print_connection_info(self) -> None:
+        """Constructs and displays the command to set up the SSH tunnel to the server and the link to the localhost
+        server view in the terminal.
+        The SSH command should be used via a separate terminal or subprocess call to establish the secure SSH tunnel to
+        the Jupyter server. Once the SSH tunnel is established, the printed localhost url can be used to view the
+        server from the local machine.
+        """
+        # If connection information is not available, there is nothing to print
+        if self.connection_info is None:
+            console.echo(
+                message=(
+                    f"No connection information is available for the job {self.job_name}, which indicates that the job "
+                    f"has not been submitted to the server. Submit the job for execution to the remote Sun lab server "
+                    f"to generate the connection information"
+                ),
+                level=LogLevel.WARNING,
+            )
+            return  # No connection information available, so does not proceed with printing.
+        # Prints generic connection details to terminal
+        console.echo(f"Jupyter is running on: {self.connection_info.compute_node}")
+        console.echo(f"Port: {self.connection_info.port}")
+        console.echo(f"Token: {self.connection_info.token}")
+        # Constructs and displays the SSH tunnel command and the localhost url for connecting to the server
+        tunnel_cmd = (
+            f"ssh -N -L {self.connection_info.port}:{self.connection_info.compute_node}:{self.connection_info.port} "
+            f"{self.user}@{self.host}"
+        )
+        localhost_url = f"http://localhost:{self.connection_info.port}/?token={self.connection_info.token}"
+        print(f"\nTo access locally, run this in a terminal:")
+        print(tunnel_cmd)
+        print(f"\nThen open: {localhost_url}")

sl-shared-assets 2.0.1__py3-none-any.whl → 3.0.0__py3-none-any.whl

Potentially problematic release.

sl-shared-assets 2.0.1py3-none-any.whl → 3.0.0py3-none-any.whl