PyPI - sl-shared-assets - Versions diffs - 4.0.1__py3-none-any.whl → 5.0.0__py3-none-any.whl - Mend

sl-shared-assets 4.0.1py3-none-any.whl → 5.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (39) hide show

sl_shared_assets/__init__.py +45 -42
sl_shared_assets/command_line_interfaces/__init__.py +3 -0
sl_shared_assets/command_line_interfaces/configure.py +173 -0
sl_shared_assets/command_line_interfaces/manage.py +226 -0
sl_shared_assets/data_classes/__init__.py +33 -32
sl_shared_assets/data_classes/configuration_data.py +267 -79
sl_shared_assets/data_classes/session_data.py +226 -289
sl_shared_assets/server/__init__.py +24 -4
sl_shared_assets/server/job.py +6 -7
sl_shared_assets/server/pipeline.py +570 -0
sl_shared_assets/server/server.py +57 -25
sl_shared_assets/tools/__init__.py +9 -8
sl_shared_assets/tools/packaging_tools.py +14 -25
sl_shared_assets/tools/project_management_tools.py +602 -523
sl_shared_assets/tools/transfer_tools.py +88 -23
{sl_shared_assets-4.0.1.dist-info → sl_shared_assets-5.0.0.dist-info}/METADATA +46 -203
sl_shared_assets-5.0.0.dist-info/RECORD +23 -0
sl_shared_assets-5.0.0.dist-info/entry_points.txt +3 -0
sl_shared_assets/__init__.pyi +0 -91
sl_shared_assets/cli.py +0 -501
sl_shared_assets/cli.pyi +0 -106
sl_shared_assets/data_classes/__init__.pyi +0 -75
sl_shared_assets/data_classes/configuration_data.pyi +0 -235
sl_shared_assets/data_classes/runtime_data.pyi +0 -157
sl_shared_assets/data_classes/session_data.pyi +0 -379
sl_shared_assets/data_classes/surgery_data.pyi +0 -89
sl_shared_assets/server/__init__.pyi +0 -11
sl_shared_assets/server/job.pyi +0 -205
sl_shared_assets/server/server.pyi +0 -298
sl_shared_assets/tools/__init__.pyi +0 -19
sl_shared_assets/tools/ascension_tools.py +0 -265
sl_shared_assets/tools/ascension_tools.pyi +0 -68
sl_shared_assets/tools/packaging_tools.pyi +0 -58
sl_shared_assets/tools/project_management_tools.pyi +0 -239
sl_shared_assets/tools/transfer_tools.pyi +0 -53
sl_shared_assets-4.0.1.dist-info/RECORD +0 -36
sl_shared_assets-4.0.1.dist-info/entry_points.txt +0 -7
{sl_shared_assets-4.0.1.dist-info → sl_shared_assets-5.0.0.dist-info}/WHEEL +0 -0
{sl_shared_assets-4.0.1.dist-info → sl_shared_assets-5.0.0.dist-info}/licenses/LICENSE +0 -0

sl_shared_assets/data_classes/session_data.py CHANGED Viewed

@@ -7,12 +7,10 @@ libraries use these classes to work with all lab-generated data."""
 import copy
 from enum import StrEnum
-from random import randint
 import shutil as sh
 from pathlib import Path
 from dataclasses import field, dataclass
-from xxhash import xxh3_64
 from filelock import FileLock
 from ataraxis_base_utilities import LogLevel, console, ensure_directory_exists
 from ataraxis_data_structures import YamlConfig
@@ -48,26 +46,6 @@ class SessionTypes(StrEnum):
     activity data."""
-class TrackerFileNames(StrEnum):
-    """Defines a set of processing tacker .yaml files supported by various Sun lab data preprocessing, processing, and
-    dataset formation pipelines.
-    This enumeration standardizes the names for all processing tracker files used in the lab. It is designed to be used
-    via the get_processing_tracker() function to generate ProcessingTracker instances.
-    """
-    BEHAVIOR = "behavior_processing_tracker.yaml"
-    """This file is used to track the state of the behavior log processing pipeline."""
-    SUITE2P = "suite2p_processing_tracker.yaml"
-    """This file is used to track the state of the single-day suite2p processing pipeline."""
-    DATASET = "dataset_formation_tracker.yaml"
-    """This file is used to track the state of the dataset formation pipeline."""
-    VIDEO = "video_processing_tracker.yaml"
-    """This file is used to track the state of the video (DeepLabCut) processing pipeline."""
-    INTEGRITY = "integrity_verification_tracker.yaml"
-    """This file is used to track the state of the data integrity verification pipeline."""
 @dataclass()
 class RawData:
     """Stores the paths to the directories and files that make up the 'raw_data' session-specific directory.
@@ -155,6 +133,10 @@ class RawData:
     runtime initialization. Since runtime initialization is a complex process that may encounter a runtime error, the
     marker is used to discover sessions that failed to initialize. Since uninitialized sessions by definition do not
     contain any valuable data, they are marked for immediate deletion from all managed destinations."""
+    root_path: Path = Path()
+    """Stores the path to the root directory of the volume that stores raw data from all Sun lab projects. Primarily,
+    this is necessary for pipelines working with the data on the remote compute server to efficiently move it between
+    storage and working (processing) volumes."""
     def resolve_paths(self, root_directory_path: Path) -> None:
         """Resolves all paths managed by the class instance based on the input root directory path.
@@ -186,6 +168,10 @@ class RawData:
         self.ubiquitin_path = self.raw_data_path.joinpath("ubiquitin.bin")
         self.nk_path = self.raw_data_path.joinpath("nk.bin")
+        # Infers the path to the root raw data directory under which the session's project is stored. This assumes that
+        # the raw_data directory is found under root/project/animal/session_id/raw_data
+        self.root_path = root_directory_path.parents[3]
     def make_directories(self) -> None:
         """Ensures that all major subdirectories and the root directory exist, creating any missing directories.
@@ -220,12 +206,10 @@ class ProcessedData:
     behavior_data_path: Path = Path()
     """Stores the path to the directory that contains the non-video and non-brain-activity data extracted from
     .npz log files by the sl-behavior log processing pipeline."""
-    p53_path: Path = Path()
-    """Stores the path to the p53.bin file. This file serves as a lock-in marker that determines whether the session is
-    in the processing or dataset state. Specifically, if the file does not exist, the session data cannot be integrated
-    into any dataset, as it may be actively worked on by processing pipelines. Conversely, if the marker exists,
-    processing pipelines are not allowed to work with the session, as it may be actively integrated into one or more
-    datasets."""
+    root_path: Path = Path()
+    """Stores the path to the root directory of the volume that stores processed data from all Sun lab projects.
+    Primarily, this is necessary for pipelines working with the data on the remote compute server to efficiently move it
+    between storage and working (processing) volumes."""
     def resolve_paths(self, root_directory_path: Path) -> None:
         """Resolves all paths managed by the class instance based on the input root directory path.
@@ -242,7 +226,10 @@ class ProcessedData:
         self.camera_data_path = self.processed_data_path.joinpath("camera_data")
         self.mesoscope_data_path = self.processed_data_path.joinpath("mesoscope_data")
         self.behavior_data_path = self.processed_data_path.joinpath("behavior_data")
-        self.p53_path = self.processed_data_path.joinpath("p53.bin")
+        # Infers the path to the root processed data directory under which the session's project is stored. This
+        # assumes that the processed_data directory is found under root/project/animal/session_id/processed_data
+        self.root_path = root_directory_path.parents[3]
     def make_directories(self) -> None:
         """Ensures that all major subdirectories and the root directory exist, creating any missing directories.
@@ -256,6 +243,48 @@ class ProcessedData:
         ensure_directory_exists(self.behavior_data_path)
+@dataclass()
+class TrackingData:
+    """Stores the paths to the directories and files that make up the 'tracking_data' session-specific directory.
+    The 'tracking_data' directory was added in version 5.0.0 to store the ProcessingTracker instance data and .lock
+    files for pipelines and tasks used to work with session data after acquisition.
+    """
+    tracking_data_path: Path = Path()
+    """Stores the path to the root tracking_data directory of the session. This directory stores the .yaml
+    ProcessingTracker files and the .lock FileLock files that jointly ensure that session's data is accessed in a
+    process- and thread-safe way while being processed by multiple different processes and pipelines."""
+    session_lock_path: Path = Path()
+    """Stores the path to the session_lock.yaml file for the session. This file is used to ensure that only a single
+    manager process has exclusive access to the session's data on the remote compute server. This ensures that multiple
+    data processing pipelines can safely run for the same session without compromising session data integrity. This
+    file is intended to be used through the SessionLock class."""
+    def resolve_paths(self, root_directory_path: Path) -> None:
+        """Resolves all paths managed by the class instance based on the input root directory path.
+        This method is called each time the (wrapper) SessionData class is instantiated to regenerate the managed path
+        hierarchy on any machine that instantiates the class.
+        Args:
+            root_directory_path: The path to the top-level directory of the session. Typically, this path is assembled
+                using the following hierarchy: root/project/animal/session_id
+        """
+        # Generates the managed paths
+        self.tracking_data_path = root_directory_path
+        self.session_lock_path = self.tracking_data_path.joinpath("session_lock.yaml")
+    def make_directories(self) -> None:
+        """Ensures that all major subdirectories and the root directory exist, creating any missing directories.
+        This method is called each time the (wrapper) SessionData class is instantiated and allowed to generate
+        missing data directories.
+        """
+        ensure_directory_exists(self.tracking_data_path)
 @dataclass
 class SessionData(YamlConfig):
     """Stores and manages the data layout of a single Sun lab data acquisition session.
@@ -297,21 +326,44 @@ class SessionData(YamlConfig):
     """Stores the version of the sl-experiment library that was used to acquire the session data."""
     raw_data: RawData = field(default_factory=lambda: RawData())
     """Stores absolute paths to all directories and files that jointly make the session's raw data hierarchy. This
-    directory structure is resolved for each machine that creates or loads the SessionData class to ensure that all
-    Sun lab data can be accessed via the same API on any destination."""
+    hierarchy is initially resolved by the acquisition system that acquires the session and used to store all data
+    acquired during the session runtime."""
     processed_data: ProcessedData = field(default_factory=lambda: ProcessedData())
     """Stores absolute paths to all directories and files that jointly make the session's processed data hierarchy.
-    Typically, this hierarchy is only used on the lab's processing server(s), but it can also be used to run local
-    testing on end-user machines."""
+    Processed data encompasses all data generated from the raw data as part of data processing."""
+    source_data: RawData = field(default_factory=lambda: RawData())
+    """Stores absolute paths to the same data as the 'raw_data' field, but with all paths resolved relative to the
+    'processed_data' root. On systems that use the same root for processed and raw data, the source and raw directories
+    are identical. On systems that use different root directories for processed and raw data, the source and raw
+    directories are different. This is used to optimize data processing on the remote compute server by temporarily
+    copying all session data to the fast processed data volume."""
+    archived_data: ProcessedData = field(default_factory=lambda: ProcessedData())
+    """Similar to the 'source_data' field, stores the absolute path to the same data as the 'processed_data' field, but
+    with all paths resolved relative to the 'raw_data' root. This path is used as part of the session data archiving
+    process to collect all session data (raw and processed) on the slow 'storage' volume of the remote compute server.
+    """
+    tracking_data: TrackingData = field(default_factory=lambda: TrackingData())
+    """Stores absolute paths to all directories and files that jointly make the session's tracking data hierarchy. This
+    hierarchy is used during all stages of data processing to track the processing progress and ensure only a single
+    manager process can modify the session's data at any given time, ensuring access safety."""
     def __post_init__(self) -> None:
-        """Ensures raw_data and processed_data are always instances of RawData and ProcessedData."""
+        """Ensures raw_data, processed_data, and source_data are always instances of RawData and ProcessedData."""
         if not isinstance(self.raw_data, RawData):
             self.raw_data = RawData()
         if not isinstance(self.processed_data, ProcessedData):
             self.processed_data = ProcessedData()
+        if not isinstance(self.source_data, RawData):
+            self.raw_data = RawData()
+        if not isinstance(self.archived_data, ProcessedData):
+            self.archived_data = ProcessedData()
+        if not isinstance(self.tracking_data, TrackingData):
+            self.raw_data = RawData()
     @classmethod
     def create(
         cls,
@@ -415,6 +467,22 @@ class SessionData(YamlConfig):
         processed_data = ProcessedData()
         processed_data.resolve_paths(root_directory_path=session_path.joinpath("processed_data"))
+        # Added in version 5.0.0. While source data is not used when the session is created (and is set to the same
+        # directory as raw_data), it is created here for completeness.
+        source_data = RawData()
+        source_data.resolve_paths(root_directory_path=session_path.joinpath("source_data"))
+        # Added in version 5.0.0. While processed data is not used when the session is created (and is set to the same
+        # directory as processed_data), it is created here for completeness.
+        archived_data = ProcessedData()
+        archived_data.resolve_paths(root_directory_path=session_path.joinpath("archived_data"))
+        # Similar to source_data, tracking data uses the same root as raw_data and is not used during data acquisition.
+        # Tracking data is used during data processing on the remote compute server(s) to ensure multiple pipelines
+        # can work with the session's data without collision.
+        tracking_data = TrackingData()
+        tracking_data.resolve_paths(root_directory_path=session_path.joinpath("tracking_data"))
         # Packages the sections generated above into a SessionData instance
         # noinspection PyArgumentList
         instance = SessionData(
@@ -424,6 +492,7 @@ class SessionData(YamlConfig):
             session_type=session_type,
             acquisition_system=acquisition_system.name,
             raw_data=raw_data,
+            source_data=source_data,
             processed_data=processed_data,
             experiment_name=experiment_name,
             python_version=python_version,
@@ -460,7 +529,6 @@ class SessionData(YamlConfig):
         cls,
         session_path: Path,
         processed_data_root: Path | None = None,
-        make_processed_data_directory: bool = False,
     ) -> "SessionData":
         """Loads the SessionData instance from the target session's session_data.yaml file.
@@ -478,55 +546,85 @@ class SessionData(YamlConfig):
                 provide the path to the root project directory (directory that stores all Sun lab projects) on that
                 drive. The method will automatically resolve the project/animal/session/processed_data hierarchy using
                 this root path. If raw and processed data are kept on the same drive, keep this set to None.
-            make_processed_data_directory: Determines whether this method should create the processed_data directory if
-                it does not exist.
         Returns:
             An initialized SessionData instance for the session whose data is stored at the provided path.
         Raises:
-            FileNotFoundError: If the 'session_data.yaml' file is not found under the session_path/raw_data/ subfolder.
+            FileNotFoundError: If multiple or no 'session_data.yaml' file instances are found under the input session
+                path directory.
         """
-        # To properly initialize the SessionData instance, the provided path should contain the raw_data directory
-        # with the session_data.yaml file.
-        session_data_path = session_path.joinpath("raw_data", "session_data.yaml")
-        if not session_data_path.exists():
+        # To properly initialize the SessionData instance, the provided path should contain a single session_data.yaml
+        # file at any hierarchy level.
+        session_data_files = [file for file in session_path.rglob("*session_data.yaml")]
+        if len(session_data_files) != 1:
             message = (
-                f"Unable to load the SessionData class for the target session: {session_path.stem}. No "
-                f"session_data.yaml file was found inside the raw_data folder of the session. This likely "
-                f"indicates that the session runtime was interrupted before recording any data, or that the "
-                f"session path does not point to a valid session."
+                f"Unable to load the SessionData class for the target session. Expected a single session_data.yaml "
+                f"file to be located under the directory tree specified by the input path: {session_path}. Instead, "
+                f"encountered {len(session_data_files)} candidate files. This indicates that the input path does not "
+                f"point to a valid session directory."
             )
             console.error(message=message, error=FileNotFoundError)
-        # Loads class data from the .yaml file
+        # If a single candidate is found (as expected), extracts it from the list and uses it to resolve the
+        # session data hierarchy.
+        session_data_path = session_data_files.pop()
+        # Loads class data from the.yaml file
         instance: SessionData = cls.from_yaml(file_path=session_data_path)  # type: ignore
         # The method assumes that the 'donor' .yaml file is always stored inside the raw_data directory of the session
-        # to be processed. Since the directory itself might have moved (between or even within the same PC) relative to
-        # where it was when the SessionData snapshot was generated, reconfigures the paths to all raw_data files using
-        # the root from above.
-        local_root = session_path.parents[2]
-        # RAW DATA
-        new_root = local_root.joinpath(instance.project_name, instance.animal_id, instance.session_name, "raw_data")
-        instance.raw_data.resolve_paths(root_directory_path=new_root)
+        # to be processed. In turn, that directory is expected to be found under the path root/project/animal/session.
+        # The code below uses this heuristic to discover the raw data root based on the session data file path.
+        local_root = session_data_path.parents[4]  # Raw data root session directory
         # Unless a different root is provided for processed data, it uses the same root as raw_data.
         if processed_data_root is None:
             processed_data_root = local_root
-        # Regenerates the processed_data path depending on the root resolution above
+        # RAW DATA
+        instance.raw_data.resolve_paths(
+            root_directory_path=local_root.joinpath(
+                instance.project_name, instance.animal_id, instance.session_name, "raw_data"
+            )
+        )
+        # PROCESSED DATA
         instance.processed_data.resolve_paths(
             root_directory_path=processed_data_root.joinpath(
                 instance.project_name, instance.animal_id, instance.session_name, "processed_data"
             )
         )
-        # Generates processed data directories if requested and necessary
-        if make_processed_data_directory:
-            instance.processed_data.make_directories()
+        # SOURCE DATA
+        instance.source_data.resolve_paths(
+            root_directory_path=processed_data_root.joinpath(
+                instance.project_name, instance.animal_id, instance.session_name, "source_data"
+            )
+        )
+        # Note, since source data is populated as part of the 'preparation' runtime, does not make the directories.
+        # ARCHIVED DATA
+        instance.archived_data.resolve_paths(
+            root_directory_path=local_root.joinpath(
+                instance.project_name, instance.animal_id, instance.session_name, "archived_data"
+            )
+        )
+        # Similar to source_data, archived data is populated as part of the 'archiving' pipeline, so directories for
+        # this data are not resolved.
+        # If there is no archived processed data, ensures that processed data hierarchy exists.
+        if not instance.archived_data.processed_data_path.exists():
+            instance.processed_data.make_directories()  # Ensures processed data directories exist
+        # TRACKING DATA
+        instance.tracking_data.resolve_paths(
+            root_directory_path=local_root.joinpath(
+                instance.project_name, instance.animal_id, instance.session_name, "tracking_data"
+            )
+        )
+        instance.tracking_data.make_directories()  # Ensures tracking data directories exist
         # Returns the initialized SessionData instance to caller
         return instance
@@ -557,6 +655,9 @@ class SessionData(YamlConfig):
         # prevents the SessionData instance from being loaded from the disk.
         origin.raw_data = None  # type: ignore
         origin.processed_data = None  # type: ignore
+        origin.source_data = None  # type: ignore
+        origin.archived_data = None  # type: ignore
+        origin.tracking_data = None  # type: ignore
         # Converts StringEnum instances to strings
         origin.session_type = str(origin.session_type)
@@ -567,310 +668,146 @@ class SessionData(YamlConfig):
 @dataclass()
-class ProcessingTracker(YamlConfig):
-    """Wraps the .yaml file that tracks the state of a data processing runtime and provides tools for communicating the
-    state between multiple processes in a thread-safe manner.
-    Primarily, this tracker class is used by all remote data processing pipelines in the lab to prevent race conditions
-    and make it impossible to run multiple processing runtimes at the same time. It is also used to evaluate the status
-    (success / failure) of jobs running on remote compute servers.
-    Note:
-        In library version 4.0.0 the processing trackers have been refactored to work similar to 'lock' files. That is,
-        when a runtime is started, the tracker is switched into the 'running' (locked) state until it is unlocked,
-        aborted, or encounters an error. When the tracker is locked, only the same manager process as the one that
-        locked the tracker is allowed to work with session data. This feature allows executing complex processing
-        pipelines that use multiple concurrent and / or sequential processing jobs on the remote server.
-        This instance frequently refers to a 'manager process' in method documentation. A 'manager process' is the
-        highest-level process that manages the runtime. When the runtime is executed on remote compute servers, the
-        manager process is typically the process running on the non-server machine (user PC) that executes the remote
-        processing job on the compute server (via SSH or similar protocol). The worker process(es) that run the
-        processing job(s) on the remote compute servers are NOT considered manager processes.
+class SessionLock(YamlConfig):
+    """Provides thread-safe session locking to ensure exclusive access during data processing.
+    This class manages a lock file that tracks which manager process currently has exclusive access to a session's data.
+    It prevents race conditions when multiple manager processes attempt to modify session data simultaneously.
+    The lock is identified by a manager process ID, allowing distributed processing across multiple jobs while
+    maintaining data integrity.
     """
     file_path: Path
-    """Stores the path to the .yaml file used to cache the tracker data on disk. The class instance functions as a
-    wrapper around the data stored inside the specified .yaml file."""
-    _complete: bool = False
-    """Tracks whether the processing runtime managed by this tracker has finished successfully."""
-    _encountered_error: bool = False
-    """Tracks whether the processing runtime managed by this tracker has encountered an error and has finished
-    unsuccessfully."""
-    _running: bool = False
-    """Tracks whether the processing runtime managed by this tracker is currently running."""
+    """Stores the absolute path to the .yaml file that stores the lock state on disk."""
     _manager_id: int = -1
-    """Stores the xxHash3-64 hash value that represents the unique identifier of the manager process that started the
-    runtime. The manager process is typically running on a remote control machine (computer) and is used to
-    support processing runtimes that are distributed over multiple separate batch jobs on the compute server. This
-    ID should be generated using the 'generate_manager_id()' function exposed by this library."""
+    """Stores the unique identifier of the manager process that holds the lock. A value of -1 indicates no lock."""
     _lock_path: str = field(init=False)
-    """Stores the path to the .lock file used to ensure that only a single process can simultaneously access the data
-    stored inside the tracker file."""
+    """Stores the absolute path to the .lock file ensuring thread-safe access to the lock state."""
     def __post_init__(self) -> None:
-        # Generates the .lock file path for the target tracker .yaml file.
+        """Initializes the lock file path based on the .yaml file path."""
         if self.file_path is not None:
             self._lock_path = str(self.file_path.with_suffix(self.file_path.suffix + ".lock"))
         else:
             self._lock_path = ""
     def _load_state(self) -> None:
-        """Reads the current processing state from the wrapped .YAML file."""
+        """Loads the current lock state from the .yaml file."""
         if self.file_path.exists():
-            # Loads the data for the state values but does not replace the file path or lock attributes.
-            instance: ProcessingTracker = self.from_yaml(self.file_path)  # type: ignore
-            self._complete = copy.copy(instance._complete)
-            self._encountered_error = copy.copy(instance._encountered_error)
-            self._running = copy.copy(instance._running)
+            instance: SessionLock = self.from_yaml(self.file_path)  # type: ignore
             self._manager_id = copy.copy(instance._manager_id)
         else:
-            # Otherwise, if the tracker file does not exist, generates a new .yaml file using default instance values
-            # and saves it to disk using the specified tracker file path.
+            # Creates a new lock file with the default state (unlocked)
             self._save_state()
     def _save_state(self) -> None:
-        """Saves the current processing state stored inside instance attributes to the specified .YAML file."""
-        # Resets the _lock_path and file_path to None before dumping the data to .YAML to avoid issues with loading it
-        # back.
+        """Saves the current lock state to the .yaml file."""
+        # Creates a copy without file paths for clean serialization
         original = copy.deepcopy(self)
         original.file_path = None  # type: ignore
         original._lock_path = None  # type: ignore
         original.to_yaml(file_path=self.file_path)
-    def start(self, manager_id: int) -> None:
-        """Configures the tracker file to indicate that a manager process is currently executing the tracked processing
-        runtime.
-        Calling this method effectively 'locks' the tracked session and processing runtime combination to only be
-        accessible from the manager process that calls this method. Calling this method for an already running runtime
-        managed by the same process does not have any effect, so it is safe to call this method at the beginning of
-        each processing job that makes up the runtime.
+    def acquire(self, manager_id: int) -> None:
+        """Acquires the session lock for exclusive access.
         Args:
-            manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to start the runtime
-                tracked by this tracker file.
+            manager_id: The unique identifier of the manager process requesting the lock.
         Raises:
-            TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
+            TimeoutError: If the .lock file cannot be acquired for a long period of time due to being held by another
+                process.
+            RuntimeError: If the lock is held by another process and forcing lock acquisition is disabled.
         """
-        # Acquires the lock
         lock = FileLock(self._lock_path)
         with lock.acquire(timeout=10.0):
-            # Loads tracker state from the .yaml file
             self._load_state()
-            # If the runtime is already running from a different process, aborts with an error.
-            if self._running and manager_id != self._manager_id:
+            # Checks if the session is already locked by another process
+            if self._manager_id != -1 and self._manager_id != manager_id:
                 message = (
-                    f"Unable to start the processing runtime from the manager process with id {manager_id}. The "
-                    f"{self.file_path.name} tracker file indicates that the manager process with id {self._manager_id} "
-                    f"is currently executing the tracked runtime. Only a single manager process is allowed to execute "
-                    f"the runtime at the same time."
+                    f"Cannot acquire the session lock for manager process {manager_id}. The {self.file_path.name} "
+                    f"session lock file indicates The lock is currently held by the manager process "
+                    f"{self._manager_id}. Call the command that produced this error with the '--reset_lock' flag "
+                    f"to override this safety feature or wait for the natural lock release."
                 )
                 console.error(message=message, error=RuntimeError)
-                raise RuntimeError(message)  # Fallback to appease mypy, should not be reachable
+                raise RuntimeError(message)
-            # Otherwise, if the runtime is already running for the current manager process, returns without modifying
-            # the tracker data.
-            elif self._running and manager_id == self._manager_id:
-                return
-            # Otherwise, locks the runtime for the current manager process and updates the cached tracker data
-            self._running = True
+            # The lock is free or already owned by this manager. If the lock is free, locks the session for the current
+            # manager. If it is already owned by this manager, it does nothing.
             self._manager_id = manager_id
-            self._complete = False
-            self._encountered_error = False
             self._save_state()
-    def error(self, manager_id: int) -> None:
-        """Configures the tracker file to indicate that the tracked processing runtime encountered an error and failed
-        to complete.
-        This method fulfills two main purposes. First, it 'unlocks' the runtime, allowing other manager processes to
-        interface with the tracked runtime. Second, it updates the tracker file to reflect that the runtime was
-        interrupted due to an error, which is used by the manager processes to detect and handle processing failures.
+    def release(self, manager_id: int) -> None:
+        """Releases the session lock.
         Args:
-            manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
-                runtime tracked by this tracker file has encountered an error.
+            manager_id: The unique identifier of the manager process releasing the lock.
         Raises:
-            TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
+            TimeoutError: If the .lock file cannot be acquired for a long period of time due to being held by another
+                process.
+            RuntimeError: If the lock is held by another process.
         """
         lock = FileLock(self._lock_path)
         with lock.acquire(timeout=10.0):
-            # Loads tracker state from the .yaml file
             self._load_state()
-            # If the runtime is not running, returns without doing anything
-            if not self._running:
-                return
-            # Ensures that only the active manager process can report runtime errors using the tracker file
-            if manager_id != self._manager_id:
+            if self._manager_id != manager_id:
                 message = (
-                    f"Unable to report that the processing runtime has encountered an error from the manager process "
-                    f"with id {manager_id}. The {self.file_path.name} tracker file indicates that the runtime is "
-                    f"managed by the process with id {self._manager_id}, preventing other processes from interfacing "
-                    f"with the runtime."
+                    f"Unable to release the session lock from the manager with id {manager_id}. The "
+                    f"{self.file_path.name} session lock file indicates that the lock is held by the process with "
+                    f"id {self._manager_id}, preventing other processes from interfacing with the session lock."
                 )
                 console.error(message=message, error=RuntimeError)
                 raise RuntimeError(message)  # Fallback to appease mypy, should not be reachable
-            # Indicates that the runtime aborted with an error
-            self._running = False
+            # Releases the lock
             self._manager_id = -1
-            self._complete = False
-            self._encountered_error = True
             self._save_state()
-    def stop(self, manager_id: int) -> None:
-        """Configures the tracker file to indicate that the tracked processing runtime has been completed successfully.
-        This method 'unlocks' the runtime, allowing other manager processes to interface with the tracked runtime. It
-        also configures the tracker file to indicate that the runtime has been completed successfully, which is used
-        by the manager processes to detect and handle processing completion.
+    def force_release(self) -> None:
+        """Forcibly releases the lock regardless of ownership.
-        Args:
-            manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
-                runtime tracked by this tracker file has been completed successfully.
+        This method should only be used for emergency recovery of deadlocked sessions. It can be called by any process
+        to unlock the session whose lock is managed by this instance.
         Raises:
-            TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
+            TimeoutError: If the .lock file cannot be acquired for a long period of time due to being held by another
+                process.
         """
         lock = FileLock(self._lock_path)
         with lock.acquire(timeout=10.0):
-            # Loads tracker state from the .yaml file
-            self._load_state()
-            # If the runtime is not running, does not do anything
-            if not self._running:
-                return
-            # Ensures that only the active manager process can report runtime completion using the tracker file
-            if manager_id != self._manager_id:
-                message = (
-                    f"Unable to report that the processing runtime has completed successfully from the manager process "
-                    f"with id {manager_id}. The {self.file_path.name} tracker file indicates that the runtime is "
-                    f"managed by the process with id {self._manager_id}, preventing other processes from interfacing "
-                    f"with the runtime."
-                )
-                console.error(message=message, error=RuntimeError)
-                raise RuntimeError(message)  # Fallback to appease mypy, should not be reachable
-            # Otherwise, marks the runtime as complete (stopped)
-            self._running = False
             self._manager_id = -1
-            self._complete = True
-            self._encountered_error = False
             self._save_state()
-    def abort(self) -> None:
-        """Resets the runtime tracker file to the default state.
+    @property
+    def is_locked(self) -> bool:
+        """Returns True if the session is currently locked by any process, False otherwise.
-        This method can be used to reset the runtime tracker file, regardless of the current runtime state. Unlike other
-        instance methods, this method can be called from any manager process, even if the runtime is already locked by
-        another process. This method is only intended to be used in the case of emergency to 'unlock' a deadlocked
-        runtime.
+        Raises:
+            TimeoutError: If the .lock file cannot be acquired for a long period of time due to being held by another
+                process.
         """
         lock = FileLock(self._lock_path)
         with lock.acquire(timeout=10.0):
-            # Loads tracker state from the .yaml file
-            self._load_state()
-            # Resets the tracker file to the default state. Note, does not indicate that the runtime is complete nor
-            # that it has encountered an error.
-            self._running = False
-            self._manager_id = -1
-            self._complete = False
-            self._encountered_error = False
-            self._save_state()
-    @property
-    def is_complete(self) -> bool:
-        """Returns True if the tracker wrapped by the instance indicates that the processing runtime has been completed
-        successfully and that the runtime is not currently ongoing."""
-        lock = FileLock(self._lock_path)
-        with lock.acquire(timeout=10.0):
-            # Loads tracker state from the .yaml file
             self._load_state()
-            return self._complete
+            return self._manager_id != -1
     @property
-    def encountered_error(self) -> bool:
-        """Returns True if the tracker wrapped by the instance indicates that the processing runtime has aborted due
-        to encountering an error."""
-        lock = FileLock(self._lock_path)
-        with lock.acquire(timeout=10.0):
-            # Loads tracker state from the .yaml file
-            self._load_state()
-            return self._encountered_error
+    def owner(self) -> int | None:
+        """Returns the unique identifier of the manager process that holds the lock if the session is locked or None if
+        the session is unlocked.
-    @property
-    def is_running(self) -> bool:
-        """Returns True if the tracker wrapped by the instance indicates that the processing runtime is currently
-        ongoing."""
+        Raises:
+            TimeoutError: If the .lock file cannot be acquired for a long period of time due to being held by another
+                process.
+        """
         lock = FileLock(self._lock_path)
         with lock.acquire(timeout=10.0):
-            # Loads tracker state from the .yaml file
             self._load_state()
-            return self._running
-def get_processing_tracker(root: Path, file_name: TrackerFileNames | str) -> ProcessingTracker:
-    """Initializes and returns the ProcessingTracker instance that manages the data stored inside the target processing
-    tracker file.
-    This function uses the input root path and tracker file name to first resolve the absolute path to the .yaml data
-    cache of the target processing tracker file and then wrap the file into a ProcessingTracker instance. All Sun lab
-    libraries that use ProcessingTracker instances use this function to access the necessary trackers.
-    Notes:
-        If the target file does not exist, this function will create the file as part of the ProcessingTracker
-        initialization.
-        This function also generates the corresponding .lock file to ensure that the data inside the processing tracker
-        is accessed by a single process at a time.
-    Args:
-        file_name: The name of the target processing tracker file. Has to be one of the names from the TrackerFileNames
-            enumeration.
-        root: The absolute path to the directory where the target file is stored or should be created.
-    Returns:
-        The initialized ProcessingTracker instance that manages the data stored in the target file.
-    """
-    # Prevents using the function for unsupported tracker file names.
-    supported_files = tuple(TrackerFileNames)
-    if file_name not in supported_files:
-        message = (
-            f"Unable to construct the path to the tracker file {file_name}. The input name is not one of the supported"
-            f"names. Use one of the supported options provided by the TrackerFileNames enumeration."
-        )
-        console.error(message=message, error=ValueError)
-    # Constructs and returns the absolute path to the requested tracker file.
-    tracker_path = root.joinpath(file_name)
-    return ProcessingTracker(file_path=tracker_path)
-def generate_manager_id() -> int:
-    """Generates and returns a unique integer identifier that can be used to identify the manager process that calls
-    this function.
-    The identifier is generated based on the current timestamp, accurate to microseconds, and a random number between 1
-    and 9999999999999. This ensures that the identifier is unique for each function call. The generated identifier
-    string is converted to a unique integer value using the xxHash-64 algorithm before it is returned to the caller.
-    Notes:
-        This function should be used to generate manager process identifiers for working with ProcessingTracker
-        instances from sl-shared-assets version 4.0.0 and above.
-    """
-    timestamp = get_timestamp()
-    random_number = randint(1, 9999999999999)
-    manager_id = f"{timestamp}_{random_number}"
-    id_hash = xxh3_64()
-    id_hash.update(manager_id)
-    return id_hash.intdigest()
+            return self._manager_id if self._manager_id != -1 else None

sl-shared-assets 4.0.1__py3-none-any.whl → 5.0.0__py3-none-any.whl

Potentially problematic release.

sl-shared-assets 4.0.1py3-none-any.whl → 5.0.0py3-none-any.whl