PyPI - sl-shared-assets - Versions diffs - 3.1.3__py3-none-any.whl → 4.0.1__py3-none-any.whl - Mend

sl-shared-assets 3.1.3py3-none-any.whl → 4.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (24) hide show

sl_shared_assets/__init__.py +8 -0
sl_shared_assets/__init__.pyi +8 -0
sl_shared_assets/cli.py +23 -9
sl_shared_assets/cli.pyi +7 -2
sl_shared_assets/data_classes/__init__.py +13 -1
sl_shared_assets/data_classes/__init__.pyi +6 -0
sl_shared_assets/data_classes/configuration_data.py +1 -1
sl_shared_assets/data_classes/runtime_data.py +29 -18
sl_shared_assets/data_classes/runtime_data.pyi +10 -7
sl_shared_assets/data_classes/session_data.py +269 -230
sl_shared_assets/data_classes/session_data.pyi +111 -34
sl_shared_assets/data_classes/surgery_data.py +7 -7
sl_shared_assets/data_classes/surgery_data.pyi +7 -7
sl_shared_assets/server/server.py +2 -2
sl_shared_assets/tools/packaging_tools.py +7 -8
sl_shared_assets/tools/packaging_tools.pyi +2 -0
sl_shared_assets/tools/project_management_tools.py +182 -91
sl_shared_assets/tools/project_management_tools.pyi +48 -12
{sl_shared_assets-3.1.3.dist-info → sl_shared_assets-4.0.1.dist-info}/METADATA +45 -47
sl_shared_assets-4.0.1.dist-info/RECORD +36 -0
sl_shared_assets-3.1.3.dist-info/RECORD +0 -36
{sl_shared_assets-3.1.3.dist-info → sl_shared_assets-4.0.1.dist-info}/WHEEL +0 -0
{sl_shared_assets-3.1.3.dist-info → sl_shared_assets-4.0.1.dist-info}/entry_points.txt +0 -0
{sl_shared_assets-3.1.3.dist-info → sl_shared_assets-4.0.1.dist-info}/licenses/LICENSE +0 -0

sl_shared_assets/data_classes/session_data.pyi CHANGED Viewed

@@ -29,6 +29,20 @@ class SessionTypes(StrEnum):
     MESOSCOPE_EXPERIMENT = "mesoscope experiment"
     WINDOW_CHECKING = "window checking"
+class TrackerFileNames(StrEnum):
+    """Defines a set of processing tacker .yaml files supported by various Sun lab data preprocessing, processing, and
+    dataset formation pipelines.
+    This enumeration standardizes the names for all processing tracker files used in the lab. It is designed to be used
+    via the get_processing_tracker() function to generate ProcessingTracker instances.
+    """
+    BEHAVIOR = "behavior_processing_tracker.yaml"
+    SUITE2P = "suite2p_processing_tracker.yaml"
+    DATASET = "dataset_formation_tracker.yaml"
+    VIDEO = "video_processing_tracker.yaml"
+    INTEGRITY = "integrity_verification_tracker.yaml"
 @dataclass()
 class RawData:
     """Stores the paths to the directories and files that make up the 'raw_data' session-specific directory.
@@ -60,7 +74,6 @@ class RawData:
     telomere_path: Path = ...
     ubiquitin_path: Path = ...
     nk_path: Path = ...
-    integrity_verification_tracker_path: Path = ...
     def resolve_paths(self, root_directory_path: Path) -> None:
         """Resolves all paths managed by the class instance based on the input root directory path.
@@ -91,9 +104,6 @@ class ProcessedData:
     camera_data_path: Path = ...
     mesoscope_data_path: Path = ...
     behavior_data_path: Path = ...
-    suite2p_processing_tracker_path: Path = ...
-    behavior_processing_tracker_path: Path = ...
-    video_processing_tracker_path: Path = ...
     p53_path: Path = ...
     def resolve_paths(self, root_directory_path: Path) -> None:
         """Resolves all paths managed by the class instance based on the input root directory path.
@@ -133,8 +143,8 @@ class SessionData(YamlConfig):
     animal_id: str
     session_name: str
     session_type: str | SessionTypes
-    acquisition_system: str | AcquisitionSystems
-    experiment_name: str | None
+    acquisition_system: str | AcquisitionSystems = ...
+    experiment_name: str | None = ...
     python_version: str = ...
     sl_experiment_version: str = ...
     raw_data: RawData = field(default_factory=Incomplete)
@@ -223,7 +233,7 @@ class SessionData(YamlConfig):
         that did not fully initialize during runtime. This service method is designed to be called by the sl-experiment
         library classes to remove the 'nk.bin' marker when it is safe to do so. It should not be called by end-users.
         """
-    def _save(self) -> None:
+    def save(self) -> None:
         """Saves the instance data to the 'raw_data' directory of the managed session as a 'session_data.yaml' file.
         This is used to save the data stored in the instance to disk so that it can be reused during further stages of
@@ -237,66 +247,133 @@ class ProcessingTracker(YamlConfig):
     state between multiple processes in a thread-safe manner.
     Primarily, this tracker class is used by all remote data processing pipelines in the lab to prevent race conditions
-    and make it impossible to run multiple processing runtimes at the same time.
+    and make it impossible to run multiple processing runtimes at the same time. It is also used to evaluate the status
+    (success / failure) of jobs running on remote compute servers.
+    Note:
+        In library version 4.0.0 the processing trackers have been refactored to work similar to 'lock' files. That is,
+        when a runtime is started, the tracker is switched into the 'running' (locked) state until it is unlocked,
+        aborted, or encounters an error. When the tracker is locked, only the same manager process as the one that
+        locked the tracker is allowed to work with session data. This feature allows executing complex processing
+        pipelines that use multiple concurrent and / or sequential processing jobs on the remote server.
+        This instance frequently refers to a 'manager process' in method documentation. A 'manager process' is the
+        highest-level process that manages the runtime. When the runtime is executed on remote compute servers, the
+        manager process is typically the process running on the non-server machine (user PC) that executes the remote
+        processing job on the compute server (via SSH or similar protocol). The worker process(es) that run the
+        processing job(s) on the remote compute servers are NOT considered manager processes.
     """
     file_path: Path
-    _is_complete: bool = ...
+    _complete: bool = ...
     _encountered_error: bool = ...
-    _is_running: bool = ...
+    _running: bool = ...
+    _manager_id: int = ...
     _lock_path: str = field(init=False)
-    _started_runtime: bool = ...
     def __post_init__(self) -> None: ...
-    def __del__(self) -> None:
-        """If the instance as used to start a runtime, ensures that the instance properly marks the runtime as completed
-        or erred before being garbage-collected.
-        This is a security mechanism to prevent deadlocking the processed session and pipeline for future runtimes.
-        """
     def _load_state(self) -> None:
         """Reads the current processing state from the wrapped .YAML file."""
     def _save_state(self) -> None:
         """Saves the current processing state stored inside instance attributes to the specified .YAML file."""
-    def start(self) -> None:
-        """Configures the tracker file to indicate that the tracked processing runtime is currently running.
+    def start(self, manager_id: int) -> None:
+        """Configures the tracker file to indicate that a manager process is currently executing the tracked processing
+        runtime.
+        Calling this method effectively 'locks' the tracked session and processing runtime combination to only be
+        accessible from the manager process that calls this method. Calling this method for an already running runtime
+        managed by the same process does not have any effect, so it is safe to call this method at the beginning of
+        each processing job that makes up the runtime.
-        All further attempts to start the same processing runtime for the same session's data will automatically abort
-        with an error.
+        Args:
+            manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to start the runtime
+                tracked by this tracker file.
         Raises:
             TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
         """
-    def error(self) -> None:
+    def error(self, manager_id: int) -> None:
         """Configures the tracker file to indicate that the tracked processing runtime encountered an error and failed
         to complete.
-        This method will only work for an active runtime. When called for an active runtime, it expects the runtime to
-        be aborted with an error after the method returns. It configures the target tracker to allow other processes
-        to restart the runtime at any point after this method returns, so it is UNSAFE to do any further processing
-        from the process that calls this method.
+        This method fulfills two main purposes. First, it 'unlocks' the runtime, allowing other manager processes to
+        interface with the tracked runtime. Second, it updates the tracker file to reflect that the runtime was
+        interrupted due to an error, which is used by the manager processes to detect and handle processing failures.
+        Args:
+            manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
+                runtime tracked by this tracker file has encountered an error.
         Raises:
             TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
         """
-    def stop(self) -> None:
+    def stop(self, manager_id: int) -> None:
         """Configures the tracker file to indicate that the tracked processing runtime has been completed successfully.
-        After this method returns, it is UNSAFE to do any further processing from the process that calls this method.
-        Any process that calls the 'start' method of this class is expected to also call this method or 'error' method
-        at the end of the runtime.
+        This method 'unlocks' the runtime, allowing other manager processes to interface with the tracked runtime. It
+        also configures the tracker file to indicate that the runtime has been completed successfully, which is used
+        by the manager processes to detect and handle processing completion.
+        Args:
+            manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
+                runtime tracked by this tracker file has been completed successfully.
         Raises:
             TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
         """
+    def abort(self) -> None:
+        """Resets the runtime tracker file to the default state.
+        This method can be used to reset the runtime tracker file, regardless of the current runtime state. Unlike other
+        instance methods, this method can be called from any manager process, even if the runtime is already locked by
+        another process. This method is only intended to be used in the case of emergency to 'unlock' a deadlocked
+        runtime.
+        """
     @property
     def is_complete(self) -> bool:
         """Returns True if the tracker wrapped by the instance indicates that the processing runtime has been completed
-        successfully at least once and that there is no ongoing processing that uses the target session."""
+        successfully and that the runtime is not currently ongoing."""
     @property
     def encountered_error(self) -> bool:
-        """Returns True if the tracker wrapped by the instance indicates that the processing runtime for the target
-        session has aborted due to encountering an error."""
+        """Returns True if the tracker wrapped by the instance indicates that the processing runtime has aborted due
+        to encountering an error."""
     @property
     def is_running(self) -> bool:
         """Returns True if the tracker wrapped by the instance indicates that the processing runtime is currently
-        running for the target session."""
+        ongoing."""
+def get_processing_tracker(root: Path, file_name: TrackerFileNames | str) -> ProcessingTracker:
+    """Initializes and returns the ProcessingTracker instance that manages the data stored inside the target processing
+    tracker file.
+    This function uses the input root path and tracker file name to first resolve the absolute path to the .yaml data
+    cache of the target processing tracker file and then wrap the file into a ProcessingTracker instance. All Sun lab
+    libraries that use ProcessingTracker instances use this function to access the necessary trackers.
+    Notes:
+        If the target file does not exist, this function will create the file as part of the ProcessingTracker
+        initialization.
+        This function also generates the corresponding .lock file to ensure that the data inside the processing tracker
+        is accessed by a single process at a time.
+    Args:
+        file_name: The name of the target processing tracker file. Has to be one of the names from the TrackerFileNames
+            enumeration.
+        root: The absolute path to the directory where the target file is stored or should be created.
+    Returns:
+        The initialized ProcessingTracker instance that manages the data stored in the target file.
+    """
+def generate_manager_id() -> int:
+    """Generates and returns a unique integer identifier that can be used to identify the manager process that calls
+    this function.
+    The identifier is generated based on the current timestamp, accurate to microseconds, and a random number between 1
+    and 9999999999999. This ensures that the identifier is unique for each function call. The generated identifier
+    string is converted to a unique integer value using the xxHash-64 algorithm before it is returned to the caller.
+    Notes:
+        This function should be used to generate manager process identifiers for working with ProcessingTracker
+        instances from sl-shared-assets version 4.0.0 and above.
+    """

sl_shared_assets/data_classes/surgery_data.py CHANGED Viewed

@@ -56,7 +56,7 @@ class ProcedureData:
 @dataclass
 class ImplantData:
-    """Stores the information about a single implantation performed during the surgical intervention.
+    """Stores the information about a single implantation procedure performed during the surgical intervention.
     Multiple ImplantData instances are used at the same time if the surgery involved multiple implants.
     """
@@ -65,7 +65,7 @@ class ImplantData:
     """The descriptive name of the implant."""
     implant_target: str
     """The name of the brain region or cranium section targeted by the implant."""
-    implant_code: int
+    implant_code: str
     """The manufacturer code or internal reference code for the implant. This code is used to identify the implant in
     additional datasheets and lab ordering documents."""
     implant_ap_coordinate_mm: float
@@ -89,7 +89,7 @@ class InjectionData:
     """The name of the brain region targeted by the injection."""
     injection_volume_nl: float
     """The volume of substance, in nanoliters, delivered during the injection."""
-    injection_code: int
+    injection_code: str
     """The manufacturer code or internal reference code for the injected substance. This code is used to identify the
     substance in additional datasheets and lab ordering documents."""
     injection_ap_coordinate_mm: float
@@ -108,22 +108,22 @@ class DrugData:
     lactated_ringers_solution_volume_ml: float
     """Stores the volume of Lactated Ringer's Solution (LRS) administered during surgery, in ml."""
-    lactated_ringers_solution_code: int
+    lactated_ringers_solution_code: str
     """Stores the manufacturer code or internal reference code for Lactated Ringer's Solution (LRS). This code is used
     to identify the LRS batch in additional datasheets and lab ordering documents."""
     ketoprofen_volume_ml: float
     """Stores the volume of ketoprofen diluted with saline administered during surgery, in ml."""
-    ketoprofen_code: int
+    ketoprofen_code: str
     """Stores the manufacturer code or internal reference code for ketoprofen. This code is used to identify the
     ketoprofen batch in additional datasheets and lab ordering documents."""
     buprenorphine_volume_ml: float
     """Stores the volume of buprenorphine diluted with saline administered during surgery, in ml."""
-    buprenorphine_code: int
+    buprenorphine_code: str
     """Stores the manufacturer code or internal reference code for buprenorphine. This code is used to identify the
     buprenorphine batch in additional datasheets and lab ordering documents."""
     dexamethasone_volume_ml: float
     """Stores the volume of dexamethasone diluted with saline administered during surgery, in ml."""
-    dexamethasone_code: int
+    dexamethasone_code: str
     """Stores the manufacturer code or internal reference code for dexamethasone. This code is used to identify the
     dexamethasone batch in additional datasheets and lab ordering documents."""

sl_shared_assets/data_classes/surgery_data.pyi CHANGED Viewed

@@ -30,14 +30,14 @@ class ProcedureData:
 @dataclass
 class ImplantData:
-    """Stores the information about a single implantation performed during the surgical intervention.
+    """Stores the information about a single implantation procedure performed during the surgical intervention.
     Multiple ImplantData instances are used at the same time if the surgery involved multiple implants.
     """
     implant: str
     implant_target: str
-    implant_code: int
+    implant_code: str
     implant_ap_coordinate_mm: float
     implant_ml_coordinate_mm: float
     implant_dv_coordinate_mm: float
@@ -52,7 +52,7 @@ class InjectionData:
     injection: str
     injection_target: str
     injection_volume_nl: float
-    injection_code: int
+    injection_code: str
     injection_ap_coordinate_mm: float
     injection_ml_coordinate_mm: float
     injection_dv_coordinate_mm: float
@@ -64,13 +64,13 @@ class DrugData:
     """
     lactated_ringers_solution_volume_ml: float
-    lactated_ringers_solution_code: int
+    lactated_ringers_solution_code: str
     ketoprofen_volume_ml: float
-    ketoprofen_code: int
+    ketoprofen_code: str
     buprenorphine_volume_ml: float
-    buprenorphine_code: int
+    buprenorphine_code: str
     dexamethasone_volume_ml: float
-    dexamethasone_code: int
+    dexamethasone_code: str
 @dataclass
 class SurgeryData(YamlConfig):

sl_shared_assets/server/server.py CHANGED Viewed

@@ -393,8 +393,8 @@ class Server:
                     # The file doesn't exist yet or job initialization failed
                     if self.job_complete(job):
                         message = (
-                            f"Remote jupyter server job {job.job_name} with id {job.job_id} encountered a startup and "
-                            f"was terminated prematurely."
+                            f"Remote jupyter server job {job.job_name} with id {job.job_id} encountered a startup "
+                            f"error and was terminated prematurely."
                         )
                         console.error(message, RuntimeError)

sl_shared_assets/tools/packaging_tools.py CHANGED Viewed

@@ -10,6 +10,8 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
 from tqdm import tqdm
 import xxhash
+from ..data_classes import TrackerFileNames
 # Defines a 'blacklist' set of files. Primarily, this list contains the service files that may change after the session
 # data has been acquired. Therefore, it does not make sense to include them in the checksum, as they do not reflect the
 # data that should remain permanently unchanged. Note, make sure all service files are added to this set!
@@ -19,16 +21,13 @@ _excluded_files = {
     "telomere.bin",
     "p53.bin",
     "nk.bin",
-    "suite2p_processing_tracker.yaml",
-    "dataset_formation_tracker.yaml",
-    "video_processing_tracker.yaml",
-    "integrity_verification_tracker.yaml",
-    "suite2p_processing_tracker.yaml.lock",
-    "behavior_processing_tracker.yaml.lock",
-    "video_processing_tracker.yaml.lock",
-    "integrity_verification_tracker.yaml.lock",
 }
+# Extends the exclusion set to include all tracker .yaml files and their concurrent access .lock files.
+for name in tuple(TrackerFileNames):
+    _excluded_files.add(name)
+    _excluded_files.add(f"{name}.lock")
 def _calculate_file_checksum(base_directory: Path, file_path: Path) -> tuple[str, bytes]:
     """Calculates xxHash3-128 checksum for a single file and its path relative to the base directory.

sl_shared_assets/tools/packaging_tools.pyi CHANGED Viewed

@@ -2,6 +2,8 @@ from pathlib import Path
 from _typeshed import Incomplete
+from ..data_classes import TrackerFileNames as TrackerFileNames
 _excluded_files: Incomplete
 def _calculate_file_checksum(base_directory: Path, file_path: Path) -> tuple[str, bytes]:

sl-shared-assets 3.1.3__py3-none-any.whl → 4.0.1__py3-none-any.whl

Potentially problematic release.

sl-shared-assets 3.1.3py3-none-any.whl → 4.0.1py3-none-any.whl