PyPI - sl-shared-assets - Versions diffs - 2.0.0__py3-none-any.whl → 3.0.0rc1__py3-none-any.whl - Mend

sl-shared-assets 2.0.0py3-none-any.whl → 3.0.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (25) hide show

sl_shared_assets/__init__.py +9 -5
sl_shared_assets/__init__.pyi +4 -4
sl_shared_assets/cli.py +270 -20
sl_shared_assets/cli.pyi +50 -5
sl_shared_assets/data_classes/configuration_data.py +20 -0
sl_shared_assets/data_classes/configuration_data.pyi +14 -0
sl_shared_assets/data_classes/session_data.py +7 -11
sl_shared_assets/data_classes/session_data.pyi +1 -2
sl_shared_assets/server/__init__.py +2 -2
sl_shared_assets/server/__init__.pyi +5 -2
sl_shared_assets/server/job.py +229 -1
sl_shared_assets/server/job.pyi +111 -0
sl_shared_assets/server/server.py +365 -31
sl_shared_assets/server/server.pyi +144 -15
sl_shared_assets/tools/__init__.py +2 -1
sl_shared_assets/tools/__init__.pyi +2 -0
sl_shared_assets/tools/packaging_tools.py +1 -2
sl_shared_assets/tools/project_management_tools.py +150 -34
sl_shared_assets/tools/project_management_tools.pyi +46 -3
{sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0rc1.dist-info}/METADATA +5 -6
sl_shared_assets-3.0.0rc1.dist-info/RECORD +36 -0
{sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0rc1.dist-info}/entry_points.txt +2 -0
sl_shared_assets-2.0.0.dist-info/RECORD +0 -36
{sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0rc1.dist-info}/WHEEL +0 -0
{sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0rc1.dist-info}/licenses/LICENSE +0 -0

sl_shared_assets/server/server.pyi CHANGED Viewed

@@ -1,34 +1,42 @@
 from pathlib import Path
-from dataclasses import dataclass
+from dataclasses import field, dataclass
+from _typeshed import Incomplete
 from simple_slurm import Slurm as Slurm
 from paramiko.client import SSHClient as SSHClient
 from ataraxis_data_structures import YamlConfig
-from .job import Job as Job
+from .job import (
+    Job as Job,
+    JupyterJob as JupyterJob,
+)
 def generate_server_credentials(
     output_directory: Path,
     username: str,
     password: str,
     host: str = "cbsuwsun.biohpc.cornell.edu",
-    raw_data_root: str = "/workdir/sun_data",
-    processed_data_root: str = "/storage/sun_data",
+    storage_root: str = "/local/workdir",
+    working_root: str = "/local/storage",
+    shared_directory_name: str = "sun_data",
 ) -> None:
     """Generates a new server_credentials.yaml file under the specified directory, using input information.
     This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
-    only used when setting up new host-computers in the lab.
+    only used when setting up new host-computers or users in the lab.
     Args:
         output_directory: The directory where to save the generated server_credentials.yaml file.
         username: The username to use for server authentication.
         password: The password to use for server authentication.
         host: The hostname or IP address of the server to connect to.
-        raw_data_root: The path to the root directory used to store the raw data from all Sun lab projects on the
-            server.
-        processed_data_root: The path to the root directory used to store the processed data from all Sun lab projects
-            on the server.
+        storage_root: The path to the root storage (slow) server directory. Typically, this is the path to the
+            top-level (root) directory of the HDD RAID volume.
+        working_root: The path to the root working (fast) server directory. Typically, this is the path to the
+            top-level (root) directory of the NVME RAID volume. If the server uses the same volume for both storage and
+            working directories, enter the same path under both 'storage_root' and 'working_root'.
+        shared_directory_name: The name of the shared directory used to store all Sun lab project data on the storage
+            and working server volumes.
     """
 @dataclass()
 class ServerCredentials(YamlConfig):
@@ -43,8 +51,15 @@ class ServerCredentials(YamlConfig):
     username: str = ...
     password: str = ...
     host: str = ...
-    raw_data_root: str = ...
-    processed_data_root: str = ...
+    storage_root: str = ...
+    working_root: str = ...
+    shared_directory_name: str = ...
+    raw_data_root: str = field(init=False, default_factory=Incomplete)
+    processed_data_root: str = field(init=False, default_factory=Incomplete)
+    user_data_root: str = field(init=False, default_factory=Incomplete)
+    user_working_root: str = field(init=False, default_factory=Incomplete)
+    def __post_init__(self) -> None:
+        """Statically resolves the paths to end-point directories using provided root directories."""
 class Server:
     """Encapsulates access to the Sun lab BioHPC processing server.
@@ -75,7 +90,79 @@ class Server:
     def __init__(self, credentials_path: Path) -> None: ...
     def __del__(self) -> None:
         """If the instance is connected to the server, terminates the connection before the instance is destroyed."""
-    def submit_job(self, job: Job) -> Job:
+    def create_job(
+        self, job_name: str, conda_environment: str, cpus_to_use: int = 10, ram_gb: int = 10, time_limit: int = 60
+    ) -> Job:
+        """Creates and returns a new Job instance.
+        Use this method to generate Job objects for all headless jobs that need to be run on the remote server. The
+        generated Job is a precursor that requires further configuration by the user before it can be submitted to the
+        server for execution.
+        Args:
+            job_name: The descriptive name of the SLURM job to be created. Primarily, this name is used in terminal
+                printouts to identify the job to human operators.
+            conda_environment: The name of the conda environment to activate on the server before running the job logic.
+                The environment should contain the necessary Python packages and CLIs to support running the job's
+                logic.
+            cpus_to_use: The number of CPUs to use for the job.
+            ram_gb: The amount of RAM to allocate for the job, in Gigabytes.
+            time_limit: The maximum time limit for the job, in minutes. If the job is still running at the end of this
+                time period, it will be forcibly terminated. It is highly advised to always set adequate maximum runtime
+                limits to prevent jobs from hogging the server in case of runtime or algorithm errors.
+        Returns:
+            The initialized Job instance pre-filled with SLURM configuration data and conda activation commands. Modify
+            the returned instance with any additional commands as necessary for the job to fulfill its intended
+            purpose. Note, the Job requires submission via submit_job() to be executed by the server.
+        """
+    def launch_jupyter_server(
+        self,
+        job_name: str,
+        conda_environment: str,
+        notebook_directory: Path,
+        cpus_to_use: int = 2,
+        ram_gb: int = 32,
+        time_limit: int = 240,
+        port: int = 0,
+        jupyter_args: str = "",
+    ) -> JupyterJob:
+        """Launches a Jupyter notebook server on the target remote Sun lab server.
+        Use this method to run interactive Jupyter sessions on the remote server under SLURM control. Unlike the
+        create_job(), this method automatically submits the job for execution as part of its runtime. Therefore, the
+        returned JupyterJob instance should only be used to query information about how to connect to the remote
+        Jupyter server.
+        Args:
+            job_name: The descriptive name of the Jupyter SLURM job to be created. Primarily, this name is used in
+                terminal printouts to identify the job to human operators.
+            conda_environment: The name of the conda environment to activate on the server before running the job logic.
+                The environment should contain the necessary Python packages and CLIs to support running the job's
+                logic. For Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
+            port: The connection port number for Jupyter server. If set to 0 (default), a random port number between
+                8888 and 9999 will be assigned to this connection to reduce the possibility of colliding with other
+                user sessions.
+            notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have GUI
+                access to items stored in or under this directory. For most runtimes, this should be set to the user's
+                root data or working directory.
+            cpus_to_use: The number of CPUs to allocate to the Jupyter server. Keep this value as small as possible to
+                avoid interfering with headless data processing jobs.
+            ram_gb: The amount of RAM, in GB, to allocate to the Jupyter server. Keep this value as small as possible to
+                avoid interfering with headless data processing jobs.
+            time_limit: The maximum Jupyter server uptime, in minutes. Set this to the expected duration of your jupyter
+                session.
+            jupyter_args: Stores additional arguments to pass to jupyter notebook initialization command.
+        Returns:
+            The initialized JupyterJob instance that stores information on how to connect to the created Jupyter server.
+            Do NOT re-submit the job to the server, as this is done as part of this method's runtime.
+        Raises:
+            TimeoutError: If the target Jupyter server doesn't start within 120 minutes from this method being called.
+            RuntimeError: If job submission fails for any reason.
+        """
+    def submit_job(self, job: Job | JupyterJob) -> Job | JupyterJob:
         """Submits the input job to the managed BioHPC server via SLURM job manager.
         This method submits various jobs for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
@@ -92,7 +179,7 @@ class Server:
         Raises:
             RuntimeError: If job submission to the server fails.
         """
-    def job_complete(self, job: Job) -> bool:
+    def job_complete(self, job: Job | JupyterJob) -> bool:
         """Returns True if the job managed by the input Job instance has been completed or terminated its runtime due
         to an error.
@@ -105,6 +192,16 @@ class Server:
             ValueError: If the input Job object does not contain a valid job_id, suggesting that it has not been
                 submitted to the server.
         """
+    def abort_job(self, job: Job | JupyterJob) -> None:
+        """Aborts the target job if it is currently running on the server.
+        Use this method to immediately abort running or queued jobs, without waiting for the timeout guard. If the job
+        is queued, this method will remove it from the SLURM queue. If the job is already terminated, this method will
+        do nothing.
+        Args:
+            job: The Job object that needs to be aborted.
+        """
     def pull_file(self, local_file_path: Path, remote_file_path: Path) -> None:
         """Moves the specified file from the remote server to the local machine.
@@ -126,18 +223,50 @@ class Server:
             remote_path: The path to the file or directory on the remote server to be removed.
             is_dir: Determines whether the input path represents a directory or a file.
         """
+    def create_directory(self, remote_path: Path, parents: bool = True) -> None:
+        """Creates the specified directory tree on the managed remote server via SFTP.
+        This method creates directories on the remote server, with options to create parent directories and handle
+        existing directories gracefully.
+        Args:
+            remote_path: The absolute path to the directory to create on the remote server, relative to the server
+                root.
+            parents: Determines whether to create parent directories, if they are missing. Otherwise, if parents do not
+                exist, raises a FileNotFoundError.
+        Notes:
+            This method silently assumes that it is fine if the directory already exists and treats it as a successful
+            runtime end-point.
+        """
+    def exists(self, remote_path: Path) -> bool:
+        """Returns True if the target file or directory exists on the remote server."""
     def close(self) -> None:
         """Closes the SSH connection to the server.
         This method has to be called before destroying the class instance to ensure proper resource cleanup.
         """
     @property
-    def raw_data_root(self) -> str:
+    def raw_data_root(self) -> Path:
         """Returns the absolute path to the directory used to store the raw data for all Sun lab projects on the server
         accessible through this class.
         """
     @property
-    def processed_data_root(self) -> str:
+    def processed_data_root(self) -> Path:
         """Returns the absolute path to the directory used to store the processed data for all Sun lab projects on the
         server accessible through this class.
         """
+    @property
+    def user_data_root(self) -> Path:
+        """Returns the absolute path to the directory used to store user-specific data on the server accessible through
+        this class."""
+    @property
+    def user_working_root(self) -> Path:
+        """Returns the absolute path to the user-specific working (fast) directory on the server accessible through
+        this class."""
+    @property
+    def host(self) -> str:
+        """Returns the hostname or IP address of the server accessible through this class."""
+    @property
+    def user(self) -> str:
+        """Returns the username used to authenticate with the server."""

sl_shared_assets/tools/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ integrity of the data. The tools from this package are used by most other data p
 from .transfer_tools import transfer_directory
 from .ascension_tools import ascend_tyche_data
 from .packaging_tools import calculate_directory_checksum
-from .project_management_tools import verify_session_checksum, generate_project_manifest
+from .project_management_tools import resolve_p53_marker, verify_session_checksum, generate_project_manifest
 __all__ = [
     "transfer_directory",
@@ -12,4 +12,5 @@ __all__ = [
     "ascend_tyche_data",
     "verify_session_checksum",
     "generate_project_manifest",
+    "resolve_p53_marker",
 ]

sl_shared_assets/tools/__init__.pyi CHANGED Viewed

@@ -2,6 +2,7 @@ from .transfer_tools import transfer_directory as transfer_directory
 from .ascension_tools import ascend_tyche_data as ascend_tyche_data
 from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
 from .project_management_tools import (
+    resolve_p53_marker as resolve_p53_marker,
     verify_session_checksum as verify_session_checksum,
     generate_project_manifest as generate_project_manifest,
 )
@@ -12,4 +13,5 @@ __all__ = [
     "ascend_tyche_data",
     "verify_session_checksum",
     "generate_project_manifest",
+    "resolve_p53_marker",
 ]

sl_shared_assets/tools/packaging_tools.py CHANGED Viewed

@@ -17,13 +17,12 @@ _excluded_files = {
     "ax_checksum.txt",
     "ubiquitin.bin",
     "telomere.bin",
+    "p53.bin",
     "suite2p_processing_tracker.yaml",
     "dataset_formation_tracker.yaml",
-    "behavior_processing_tracker.yaml",
     "video_processing_tracker.yaml",
     "integrity_verification_tracker.yaml",
     "suite2p_processing_tracker.yaml.lock",
-    "dataset_formation_tracker.yaml.lock",
     "behavior_processing_tracker.yaml.lock",
     "video_processing_tracker.yaml.lock",
     "integrity_verification_tracker.yaml.lock",

sl_shared_assets/tools/project_management_tools.py CHANGED Viewed

@@ -76,11 +76,11 @@ class ProjectManifest:
             "session",
             "type",
             "complete",
-            "integrity_verification",
-            "suite2p_processing",
-            "behavior_processing",
-            "video_processing",
-            "dataset_formation",
+            "integrity",
+            "suite2p",
+            "behavior",
+            "video",
+            "dataset",
         ]
         # Retrieves the data
@@ -93,7 +93,7 @@ class ProjectManifest:
                 animal = str(animal)
             else:
                 animal = int(animal)
-        df = df.filter(pl.col("animal") == animal)
+            df = df.filter(pl.col("animal") == animal)
         # Ensures the data displays properly
         with pl.Config(
@@ -157,7 +157,13 @@ class ProjectManifest:
         """
         return tuple(self._data.select("session").sort("session").to_series().to_list())
-    def get_sessions_for_animal(self, animal: str | int, exclude_incomplete: bool = True) -> tuple[str, ...]:
+    def get_sessions_for_animal(
+        self,
+        animal: str | int,
+        exclude_incomplete: bool = True,
+        dataset_ready_only: bool = False,
+        not_dataset_ready_only: bool = False,
+    ) -> tuple[str, ...]:
         """Returns all session IDs for the target animal.
         This provides a tuple of all sessions performed by the target animal as part of the target project.
@@ -166,6 +172,11 @@ class ProjectManifest:
             animal: The ID of the animal for which to get the session data.
             exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
                 list.
+            dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
+                the output list. Enabling this option only shows sessions that can be integrated into a dataset.
+            not_dataset_ready_only: The opposite of 'dataset_ready_only'. Determines whether to exclude sessions marked
+                as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
+                enabled, the 'dataset_ready_only' option takes precedence.
         Raises:
             ValueError: If the specified animal is not found in the manifest file.
@@ -188,6 +199,12 @@ class ProjectManifest:
         if exclude_incomplete:
             data = data.filter(pl.col("complete") == 1)
+        # Optionally filters sessions based on their readiness for dataset integration.
+        if dataset_ready_only:  # Dataset-ready option always takes precedence
+            data = data.filter(pl.col("dataset") == 1)
+        elif not_dataset_ready_only:
+            data = data.filter(pl.col("dataset") == 0)
         # Formats and returns session IDs to the caller
         sessions = data.select("session").sort("session").to_series().to_list()
         return tuple(sessions)
@@ -203,8 +220,8 @@ class ProjectManifest:
         Returns:
             A Polars DataFrame with the following columns: 'animal', 'date', 'notes', 'session', 'type', 'complete',
-            'intensity_verification', 'suite2p_processing', 'behavior_processing', 'video_processing',
-            'dataset_formation'.
+            'intensity_verification', 'suite2p', 'behavior', 'video',
+            'dataset'.
         """
         df = self._data
@@ -264,12 +281,12 @@ def generate_project_manifest(
         # Determines whether the session data is complete (ran for the intended duration and has all expected data).
         "complete": [],
         # Determines whether the session data integrity has been verified upon transfer to a storage machine.
-        "integrity_verification": [],
-        "suite2p_processing": [],  # Determines whether the session has been processed with the single-day s2p pipeline.
+        "integrity": [],
+        "suite2p": [],  # Determines whether the session has been processed with the single-day s2p pipeline.
         # Determines whether the session has been processed with the behavior extraction pipeline.
-        "behavior_processing": [],
-        "video_processing": [],  # Determines whether the session has been processed with the DeepLabCut pipeline.
-        "dataset_formation": [],  # Determines whether the session's data has been integrated into a dataset.
+        "behavior": [],
+        "video": [],  # Determines whether the session has been processed with the DeepLabCut pipeline.
+        "dataset": [],  # Determines whether the session's data is ready to be integrated into a dataset.
     }
     # Loops over each session of every animal in the project and extracts session ID information and information
@@ -336,33 +353,34 @@ def generate_project_manifest(
         # Data verification status
         tracker = ProcessingTracker(file_path=session_data.raw_data.integrity_verification_tracker_path)
-        manifest["integrity_verification"].append(tracker.is_complete)
+        manifest["integrity"].append(tracker.is_complete)
         # If the session is incomplete or unverified, marks all processing steps as FALSE, as automatic processing is
         # disabled for incomplete sessions. If the session is unverified, the case is even more severe, as its data may
         # be corrupted.
-        if not manifest["complete"][-1] or not manifest["integrity_verification"][-1]:
-            manifest["suite2p_processing"].append(False)
-            manifest["dataset_formation"].append(False)
-            manifest["behavior_processing"].append(False)
-            manifest["video_processing"].append(False)
+        if not manifest["complete"][-1] or not manifest["integrity"][-1]:
+            manifest["suite2p"].append(False)
+            manifest["dataset"].append(False)
+            manifest["behavior"].append(False)
+            manifest["video"].append(False)
             continue  # Cycles to the next session
-        # Suite2p (single-day) status
+        # Suite2p (single-day) processing status.
         tracker = ProcessingTracker(file_path=session_data.processed_data.suite2p_processing_tracker_path)
-        manifest["suite2p_processing"].append(tracker.is_complete)
+        manifest["suite2p"].append(tracker.is_complete)
-        # Dataset formation (integration) status. Tracks whether the session has been added to any dataset(s).
-        tracker = ProcessingTracker(file_path=session_data.processed_data.dataset_formation_tracker_path)
-        manifest["dataset_formation"].append(tracker.is_complete)
-        # Dataset formation (integration) status. Tracks whether the session has been added to any dataset(s).
+        # Behavior data processing status.
         tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
-        manifest["behavior_processing"].append(tracker.is_complete)
+        manifest["behavior"].append(tracker.is_complete)
         # DeepLabCut (video) processing status.
         tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
-        manifest["video_processing"].append(tracker.is_complete)
+        manifest["video"].append(tracker.is_complete)
+        # Tracks whether the session's data is ready for dataset integration. To be considered ready, the data must be
+        # successfully processed with all relevant pipelines. Any session currently being processed with any processing
+        # pipeline is considered NOT ready.
+        manifest["dataset"].append(session_data.processed_data.p53_path.exists())
     # If all animal IDs are integer-convertible, stores them as numbers to promote proper sorting. Otherwise, stores
     # them as strings. The latter options are primarily kept for compatibility with Tyche data
@@ -382,11 +400,11 @@ def generate_project_manifest(
         "type": pl.String,
         "notes": pl.String,
         "complete": pl.UInt8,
-        "integrity_verification": pl.UInt8,
-        "suite2p_processing": pl.UInt8,
-        "dataset_formation": pl.UInt8,
-        "behavior_processing": pl.UInt8,
-        "video_processing": pl.UInt8,
+        "integrity": pl.UInt8,
+        "suite2p": pl.UInt8,
+        "dataset": pl.UInt8,
+        "behavior": pl.UInt8,
+        "video": pl.UInt8,
     }
     df = pl.DataFrame(manifest, schema=schema, strict=False)
@@ -468,3 +486,101 @@ def verify_session_checksum(
         # runtime finished with an error to prevent deadlocking the runtime.
         if tracker.is_running:
             tracker.error()
+def resolve_p53_marker(
+    session_path: Path,
+    create_processed_data_directory: bool = True,
+    processed_data_root: None | Path = None,
+    remove: bool = False,
+) -> None:
+    """Depending on configuration, either creates or removes the p53.bin marker file for the target session.
+    The marker file statically determines whether the session can be targeted by data processing or dataset formation
+    pipelines.
+    Notes:
+        Since dataset integration relies on data processing outputs, it is essential to prevent processing pipelines
+        from altering the data while it is integrated into a dataset. The p53.bin marker solves this issue by ensuring
+        that only one type of runtimes (processing or dataset integration) is allowed to work with the session.
+        For the p53.bin marker to be created, the session must currently not undergo any processing and must be
+        successfully processed with the minimal set of pipelines for its session type. Removing the p53.bin marker does
+        not have any dependencies and will be executed even if the session is currently undergoing dataset integration.
+        Due to this limitation, it is only possible to call this function with the 'remove' flag manually (via the
+        dedicated CLI).
+    Args:
+        session_path: The path to the session directory for which the p53.bin marker needs to be resolved. Note, the
+            input session directory must contain the 'raw_data' subdirectory.
+        create_processed_data_directory: Determines whether to create the processed data hierarchy during runtime.
+        processed_data_root: The root directory where to store the processed data hierarchy. This path has to point to
+            the root directory where to store the processed data from all projects, and it will be automatically
+            modified to include the project name, the animal name, and the session ID.
+        remove: Determines whether this function is called to create or remove the p53.bin marker.
+    """
+    # Loads session data layout. If configured to do so, also creates the processed data hierarchy
+    session_data = SessionData.load(
+        session_path=session_path,
+        processed_data_root=processed_data_root,
+        make_processed_data_directory=create_processed_data_directory,
+    )
+    # If the p53.bin marker exists and the runtime is configured to remove it, removes the marker file. If the runtime
+    # is configured to create the marker, aborts the runtime (as the marker already exists).
+    if session_data.processed_data.p53_path.exists():
+        if remove:
+            session_data.processed_data.p53_path.unlink()
+            return  # Ends remove runtime
+        return  # Ends create runtime
+    # If the marker does not exist and the function is called in 'remove' mode, aborts the runtime
+    elif remove:
+        return  # Ends remove runtime
+    # The rest of the runtime deals with determining whether it is safe to create the marker file.
+    # Queries the type of the processed session
+    session_type = session_data.session_type
+    # If the session type is not supported, aborts with an error
+    if session_type not in _valid_session_types:
+        message = (
+            f"Unable to determine the mandatory processing pipelines for session {session_data.session_name} of animal "
+            f"{session_data.animal_id} and project {session_data.processed_data}. The type of the session "
+            f"{session_type} is not one of the supported session types: {', '.join(_valid_session_types)}."
+        )
+        console.error(message=message, error=ValueError)
+    # Window checking sessions are not designed to be integrated into datasets, so they cannot be marked with p53.bin
+    # file. Similarly, any incomplete session is automatically excluded from dataset formation.
+    if session_type == "window checking" or not session_data.raw_data.telomere_path.exists():
+        return
+    # Training sessions collect similar data and share processing pipeline requirements
+    if session_type == "lick training" or session_type == "run training":
+        # If the session has not been successfully processed with the behavior processing pipeline, aborts without
+        # creating the marker file. Also ensures that the video tracking pipeline is not actively running, although it
+        # is not required
+        behavior_tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
+        video_tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
+        if not behavior_tracker.is_complete or video_tracker.is_running:
+            # Note, training runtimes do not require suite2p processing.
+            return
+    # Mesoscope experiment sessions require additional processing with suite2p
+    if session_type == "mesoscope experiment":
+        behavior_tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
+        suite2p_tracker = ProcessingTracker(file_path=session_data.processed_data.suite2p_processing_tracker_path)
+        video_tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
+        # Similar to above, if the session is not processed with the behavior pipeline or the suite2p pipeline, aborts
+        # without creating the marker file. Video tracker is not required for p53 marker creation, but the video
+        # tracking pipeline must not be actively running.
+        if not behavior_tracker.is_complete or not suite2p_tracker.is_complete or video_tracker.is_running:
+            return
+    # If the runtime reached this point, the session is eligible for dataset integration. Creates the p53.bin marker
+    # file, preventing the session from being processed again as long as the marker exists.
+    session_data.processed_data.p53_path.touch()

sl_shared_assets/tools/project_management_tools.pyi CHANGED Viewed

@@ -69,7 +69,13 @@ class ProjectManifest:
         This provides a tuple of all sessions, independent of the participating animal, that were recorded as part
         of the target project.
         """
-    def get_sessions_for_animal(self, animal: str | int, exclude_incomplete: bool = True) -> tuple[str, ...]:
+    def get_sessions_for_animal(
+        self,
+        animal: str | int,
+        exclude_incomplete: bool = True,
+        dataset_ready_only: bool = False,
+        not_dataset_ready_only: bool = False,
+    ) -> tuple[str, ...]:
         """Returns all session IDs for the target animal.
         This provides a tuple of all sessions performed by the target animal as part of the target project.
@@ -78,6 +84,11 @@ class ProjectManifest:
             animal: The ID of the animal for which to get the session data.
             exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
                 list.
+            dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
+                the output list. Enabling this option only shows sessions that can be integrated into a dataset.
+            not_dataset_ready_only: The opposite of 'dataset_ready_only'. Determines whether to exclude sessions marked
+                as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
+                enabled, the 'dataset_ready_only' option takes precedence.
         Raises:
             ValueError: If the specified animal is not found in the manifest file.
@@ -93,8 +104,8 @@ class ProjectManifest:
         Returns:
             A Polars DataFrame with the following columns: 'animal', 'date', 'notes', 'session', 'type', 'complete',
-            'intensity_verification', 'suite2p_processing', 'behavior_processing', 'video_processing',
-            'dataset_formation'.
+            'intensity_verification', 'suite2p', 'behavior', 'video',
+            'dataset'.
         """
 def generate_project_manifest(
@@ -146,3 +157,35 @@ def verify_session_checksum(
             the root directory where to store the processed data from all projects, and it will be automatically
             modified to include the project name, the animal name, and the session ID.
     """
+def resolve_p53_marker(
+    session_path: Path,
+    create_processed_data_directory: bool = True,
+    processed_data_root: None | Path = None,
+    remove: bool = False,
+) -> None:
+    """Depending on configuration, either creates or removes the p53.bin marker file for the target session.
+    The marker file statically determines whether the session can be targeted by data processing or dataset formation
+    pipelines.
+    Notes:
+        Since dataset integration relies on data processing outputs, it is essential to prevent processing pipelines
+        from altering the data while it is integrated into a dataset. The p53.bin marker solves this issue by ensuring
+        that only one type of runtimes (processing or dataset integration) is allowed to work with the session.
+        For the p53.bin marker to be created, the session must currently not undergo any processing and must be
+        successfully processed with the minimal set of pipelines for its session type. Removing the p53.bin marker does
+        not have any dependencies and will be executed even if the session is currently undergoing dataset integration.
+        Due to this limitation, it is only possible to call this function with the 'remove' flag manually (via the
+        dedicated CLI).
+    Args:
+        session_path: The path to the session directory for which the p53.bin marker needs to be resolved. Note, the
+            input session directory must contain the 'raw_data' subdirectory.
+        create_processed_data_directory: Determines whether to create the processed data hierarchy during runtime.
+        processed_data_root: The root directory where to store the processed data hierarchy. This path has to point to
+            the root directory where to store the processed data from all projects, and it will be automatically
+            modified to include the project name, the animal name, and the session ID.
+        remove: Determines whether this function is called to create or remove the p53.bin marker.
+    """

{sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0rc1.dist-info}/METADATA RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: sl-shared-assets
-Version: 2.0.0
-Summary: Stores assets shared between multiple Sun (NeuroAI) lab data pipelines.
+Version: 3.0.0rc1
+Summary: Provides data acquisition and processing assets shared between Sun (NeuroAI) lab libraries.
 Project-URL: Homepage, https://github.com/Sun-Lab-NBB/sl-shared-assets
 Project-URL: Documentation, https://sl-shared-assets-api-docs.netlify.app/
 Author: Ivan Kondratyev, Kushaan Gupta, Natalie Yeung
@@ -681,7 +681,7 @@ License:                     GNU GENERAL PUBLIC LICENSE
         Public License instead of this License.  But first, please read
         <https://www.gnu.org/licenses/why-not-lgpl.html>.
 License-File: LICENSE
-Keywords: acquisition,assets,data,processing,sunlab
+Keywords: acquisition,assets,data,processing,server,sunlab
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
@@ -697,7 +697,7 @@ Requires-Dist: ataraxis-time==3.0.0
 Requires-Dist: click==8.2.1
 Requires-Dist: filelock==3.18.0
 Requires-Dist: natsort==8.4.0
-Requires-Dist: numpy<2.3.0,>=2.0.2
+Requires-Dist: numpy==2.2.6
 Requires-Dist: paramiko==3.5.1
 Requires-Dist: polars==1.31.0
 Requires-Dist: pyarrow==20.0.0
@@ -725,7 +725,7 @@ Requires-Dist: appdirs==1.4.4; extra == 'condarun'
 Requires-Dist: click==8.2.1; extra == 'condarun'
 Requires-Dist: filelock==3.18.0; extra == 'condarun'
 Requires-Dist: natsort==8.4.0; extra == 'condarun'
-Requires-Dist: numpy<2.3.0,>=2.0.2; extra == 'condarun'
+Requires-Dist: numpy==2.2.6; extra == 'condarun'
 Requires-Dist: paramiko==3.5.1; extra == 'condarun'
 Requires-Dist: polars==1.31.0; extra == 'condarun'
 Requires-Dist: pyarrow==20.0.0; extra == 'condarun'
@@ -858,7 +858,6 @@ We use [semantic versioning](https://semver.org/) for this project. For the vers
 - Ivan Kondratyev ([Inkaros](https://github.com/Inkaros))
 - Kushaan Gupta ([kushaangupta](https://github.com/kushaangupta))
-- Yuantao Deng ([YuantaoDeng](https://github.com/YuantaoDeng))
 - Natalie Yeung
 ___

sl-shared-assets 2.0.0__py3-none-any.whl → 3.0.0rc1__py3-none-any.whl

Potentially problematic release.

sl-shared-assets 2.0.0py3-none-any.whl → 3.0.0rc1py3-none-any.whl