PyPI - sl-shared-assets - Versions diffs - 1.0.0rc15__py3-none-any.whl → 1.0.0rc17__py3-none-any.whl - Mend

sl-shared-assets 1.0.0rc15py3-none-any.whl → 1.0.0rc17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (26) hide show

sl_shared_assets/data_classes/session_data.py +27 -9
sl_shared_assets/suite2p/multi_day.py +119 -87
sl_shared_assets/suite2p/single_day.py +220 -136
{sl_shared_assets-1.0.0rc15.dist-info → sl_shared_assets-1.0.0rc17.dist-info}/METADATA +1 -1
sl_shared_assets-1.0.0rc17.dist-info/RECORD +23 -0
sl_shared_assets/__init__.pyi +0 -71
sl_shared_assets/cli.pyi +0 -28
sl_shared_assets/data_classes/__init__.pyi +0 -61
sl_shared_assets/data_classes/configuration_data.pyi +0 -37
sl_shared_assets/data_classes/runtime_data.pyi +0 -145
sl_shared_assets/data_classes/session_data.pyi +0 -527
sl_shared_assets/data_classes/surgery_data.pyi +0 -89
sl_shared_assets/server/__init__.pyi +0 -8
sl_shared_assets/server/job.pyi +0 -94
sl_shared_assets/server/server.pyi +0 -95
sl_shared_assets/suite2p/__init__.pyi +0 -4
sl_shared_assets/suite2p/multi_day.pyi +0 -99
sl_shared_assets/suite2p/single_day.pyi +0 -192
sl_shared_assets/tools/__init__.pyi +0 -5
sl_shared_assets/tools/ascension_tools.pyi +0 -68
sl_shared_assets/tools/packaging_tools.pyi +0 -52
sl_shared_assets/tools/transfer_tools.pyi +0 -53
sl_shared_assets-1.0.0rc15.dist-info/RECORD +0 -40
{sl_shared_assets-1.0.0rc15.dist-info → sl_shared_assets-1.0.0rc17.dist-info}/WHEEL +0 -0
{sl_shared_assets-1.0.0rc15.dist-info → sl_shared_assets-1.0.0rc17.dist-info}/entry_points.txt +0 -0
{sl_shared_assets-1.0.0rc15.dist-info → sl_shared_assets-1.0.0rc17.dist-info}/licenses/LICENSE +0 -0

sl_shared_assets/data_classes/session_data.py CHANGED Viewed

@@ -528,13 +528,13 @@ class ConfigurationData:
     project_configuration_path: Path = Path()
     """Stores the path to the project_configuration.yaml file. This file contains the snapshot of the configuration
     parameters for the session's project."""
-    suite2p_configuration_path: Path = Path()
-    """Stores the path to the suite2p_configuration.yaml file stored inside the project's 'configuration' directory on
-    the fast BioHPC server volume. This configuration file specifies the parameters for the 'single day' suite2p
-    registration pipeline, which is applied to each session that generates brain activity data."""
-    multiday_configuration_path: Path = Path()
-    """Stores the path to the multiday_configuration.yaml file stored inside the project's 'configuration' directory
-    on the fast BioHPC server volume. This configuration file specifies the parameters for the 'multiday'
+    single_day_s2p_configuration_path: Path = Path()
+    """Stores the path to the single_day_s2p_configuration.yaml file stored inside the project's 'configuration'
+    directory on the fast BioHPC server volume. This configuration file specifies the parameters for the 'single day'
+    suite2p registration pipeline, which is applied to each session that generates brain activity data."""
+    multi_day_s2p_configuration_path: Path = Path()
+    """Stores the path to the multi_day_s2p_configuration.yaml file stored inside the project's 'configuration'
+    directory on the fast BioHPC server volume. This configuration file specifies the parameters for the 'multiday'
     sl-suite2p-based registration pipelines used tot rack brain cells across multiple sessions."""
     def resolve_paths(self, root_directory_path: Path, experiment_name: str | None = None) -> None:
@@ -559,8 +559,8 @@ class ConfigurationData:
         else:
             self.experiment_configuration_path = self.configuration_path.joinpath(f"{experiment_name}.yaml")
         self.project_configuration_path = self.configuration_path.joinpath("project_configuration.yaml")
-        self.suite2p_configuration_path = self.configuration_path.joinpath("suite2p_configuration.yaml")
-        self.multiday_configuration_path = self.configuration_path.joinpath("multiday_configuration.yaml")
+        self.single_day_s2p_configuration_path = self.configuration_path.joinpath("single_day_s2p_configuration.yaml")
+        self.multi_day_s2p_configuration_path = self.configuration_path.joinpath("multi_day_s2p_configuration.yaml")
     def make_directories(self) -> None:
         """Ensures that all major subdirectories and the root directory exist."""
@@ -1162,6 +1162,7 @@ class SessionData(YamlConfig):
         # RAW DATA
         new_root = local_root.joinpath(instance.project_name, instance.animal_id, instance.session_name, "raw_data")
         instance.raw_data.resolve_paths(root_directory_path=new_root)
+        instance.raw_data.make_directories()
         # Uses the adjusted raw_data section to load the ProjectConfiguration instance. This is used below to resolve
         # all other SessionData sections, as it stores various required root directories.
@@ -1178,10 +1179,12 @@ class SessionData(YamlConfig):
             root_directory_path=new_root,
             experiment_name=instance.experiment_name,
         )
+        instance.configuration_data.make_directories()
         # DEEPLABCUT
         new_root = local_root.joinpath(instance.project_name, "deeplabcut")
         instance.deeplabcut_data.resolve_paths(root_directory_path=new_root)
+        instance.deeplabcut_data.make_directories()
         # Resolves the roots for all VRPC-specific sections that use the data from the ProjectConfiguration instance:
@@ -1239,6 +1242,7 @@ class SessionData(YamlConfig):
                 instance.project_name, instance.animal_id, instance.session_name, "processed_data"
             )
         )
+        instance.processed_data.make_directories()
         # Ensures that project configuration and session data classes are present in both raw_data and processed_data
         # directories. This ensures that all data of the session can always be traced to the parent project, animal,
@@ -1270,6 +1274,20 @@ class SessionData(YamlConfig):
         create() method runtime.
         """
+        origin = copy.deepcopy(self)
+        # Resets all path fields to null. These fields are not loaded from disk when the instance is loaded, so setting
+        # them to null has no negative consequences. Conversely, keeping these fields with Path objects prevents the
+        # SessionData instance from being loaded from disk.
+        origin.raw_data = None  # type: ignore
+        origin.processed_data = None  # type: ignore
+        origin.configuration_data = None  # type: ignore
+        origin.deeplabcut_data = None  # type: ignore
+        origin.vrpc_persistent_data = None  # type: ignore
+        origin.scanimagepc_persistent_data = None  # type: ignore
+        origin.mesoscope_data = None  # type: ignore
+        origin.destinations = None  # type: ignore
         # Saves instance data as a .YAML file
         self.to_yaml(file_path=self.raw_data.session_data_path)
         self.to_yaml(file_path=self.processed_data.session_data_path)

sl_shared_assets/suite2p/multi_day.py CHANGED Viewed

@@ -4,6 +4,9 @@ extends the original suite2p code to support tracking the same objects (cells) a
 from typing import Any
 from dataclasses import field, asdict, dataclass
+from pathlib import Path
+import numpy as np
+from ataraxis_base_utilities import ensure_directory_exists
 from ataraxis_data_structures import YamlConfig
@@ -12,54 +15,81 @@ from ataraxis_data_structures import YamlConfig
 class IO:
     """Stores parameters that control data input and output during various stages of the pipeline."""
-    sessions: list[str] = field(default_factory=list)
+    session_ids: list[str] = field(default_factory=list)
+    """Stores the list of session IDs to register across days. This field should have the same length and order as the
+    session_folders list. Primarily, session IDs are used in terminal printouts to identify processed sessions to human
+    operators."""
+    session_folders: list[str] = field(default_factory=list)
     """Specifies the list of sessions to register across days, as absolute paths to their /suite2p directories
-    e.g: root/project/animal/session/processed_data/suite2p. The suite2p directory is created as part of the
-    'single-day' suite2p runtime, assuming the default value of the 'save_folder' SingleDayS2PConfiguration class
+    e.g: root/session/processed_data/mesoscope_data/suite2p. The suite2p directory is created as part of the
+    single-session suite2p processing, assuming the default value of the 'save_folder' SingleDayS2PConfiguration class
     attribute was not modified. Note, each suite2p directory has to contain the 'combined' plane folder, which is
-    created if the 'combined' SingleDayS2PConfiguration class attribute is 'True'."""
+    created if the 'combined' SingleDayS2PConfiguration attribute is 'True'."""
+@dataclass()
+class Hardware:
+    """Stores parameters that control how the suite2p interacts with the hardware of the host-computer to accelerate
+    processing speed."""
+    parallelize_registration: bool = True
+    """Determines whether to parallelize certain multi-day registration pipeline steps. Running these steps in parallel
+    results in faster overall processing, but increases the RAM usage. Since multi-day processing does not automatically
+    parallelize operations to all cores, it is generally safe and recommended to always enable this option."""
+    registration_workers: int = -1
+    """The number of parallel workers (cores) to use when parallelizing multi-day registration. Setting this to a
+    negative value uses all available cores. Setting this to zero or one disables parallelization."""
+    parallelize_extraction: bool = False
+    """Determines whether to extract multi-day cell fluorescence from multiple sessions at the same time. Note,
+    fluorescence extraction already contains automatic parallelization and will use all available cores to a certain
+    extent. Extracting data for multiple sessions at the same time is still faster due to a more efficient core
+    utilization, but typically does not not scale well (peaks for 2-3 parallel sessions) and majorly increase the RAM
+    usage.
+    """
-    mesoscan: bool = True
-    """Indicates whether the processed session /suite2p folders contain registered Mesoscope frames."""
+    parallel_sessions: int = 3
+    """The number of sessions to process in-parallel when extracting multi-day fluorescence data. Since this
+    parallelization works on top of existing suite2p numba-parallelization, it will use all available cores regardless
+    of the number of parallelized sessions. Instead, this parameter can be tuned to control the total RAM usage and
+    the extent of overall core utilization. Setting this to a value at or below one will disable session
+    parallelization."""
 @dataclass()
 class CellDetection:
     """Stores parameters for selecting single-day-registered cells (ROIs) to be tracked across multiple sessions (days).
-    To maximize the tracking pipeline reliability, it is beneficial to pre-filter the cells whose identity (as cells)
-    is not certain or that may be hard to track across sessions.
     """
     probability_threshold: float = 0.85
-    """The minimum required probability score assigned to the cell (ROI) by the suite2p classifier. Cells with a lower
-    classifier score are excluded from processing."""
+    """The minimum required probability score assigned to the cell (ROI) by the single-day suite2p classifier. Cells
+    with a lower classifier score are excluded from multi-day processing."""
     maximum_size: int = 1000
     """The maximum allowed cell (ROI) size, in pixels. Cells with a larger pixel size are excluded from processing."""
     mesoscope_stripe_borders: list[int] = field(default_factory=list)
-    """Stores the x-coordinates of mesoscope combined image stripe (ROI) borders. For mesoscope images, 'stripes' are
-    the individual imaging ROIs acquired in the 'multiple-ROI' mode. If this field is not overwritten by the user, the
-    pipeline will read the border data from the combined plane 'ops.npy' file generated by single-day suite2p pipeline.
+    """Stores the x-coordinates of combined mesoscope image stripe (ROI) borders. For mesoscope images, 'stripes' are
+    the individual imaging ROIs acquired in the 'multiple-ROI' mode. Keep this field set to an empty list to skip
+    stripe border-filtering or when working with non-mesoscope images.
     """
     stripe_margin: int = 30
     """The minimum required distance, in pixels, between the center-point (the median x-coordinate) of the cell (ROI)
     and the mesoscope stripe border. Cells that are too close to stripe borders are excluded from processing to avoid
-    ambiguities associated with tracking cells that span multiple stripes."""
+    ambiguities associated with tracking cells that span multiple stripes. This parameter is only used if
+    'mesoscope_stripe_borders' field is not set to an empty list."""
 @dataclass()
 class Registration:
-    """Stores parameters for aligning (registering) the sessions from multiple days to the same visual space.
-    Registration is used to create a 'shared' visual space, allowing to track the same cells (ROIs) across otherwise
-    variable visual space of each session.
+    """Stores parameters for aligning (registering) the sessions from multiple days to the same visual (sampling) space.
     """
     image_type: str = "enhanced"
-    """The type of single-day suite2p-generated image to use for across-day registration. Supported options are
+    """The type of suite2p-generated reference image to use for across-day registration. Supported options are
     'enhanced', 'mean' and 'max'. This 'template' image is used to calculate the necessary deformation (transformations)
     to register (align) all sessions to the same visual space."""
@@ -75,18 +105,12 @@ class Registration:
     speed_factor: float = 3
     """The relative force of the deformation transform applied when registering the sessions to the same visual space.
-    This is the most important parameter to tune."""
+    This is the most important parameter to tune. For most cases, a value between 1 and 5 is reasonable."""
 @dataclass()
 class Clustering:
-    """Stores parameters for clustering cell (ROI) masks across multiple registered sessions.
-    Clustering is used to track cells across sessions. If a group of ROIs across sessions is clustered together, it
-    is likely that they represent the same cell (ROI) across all sessions. This process involves first creating a
-    'template' mask that tracks a cell using the registered (deformed) visual space and then using this template to
-    track the cell in the original (non-deformed) visual space of each session.
-    """
+    """Stores parameters for tracking (clustering) cell (ROI) masks across multiple registered sessions (days)."""
     criterion: str = "distance"
     """Specifies the criterion for clustering (grouping) cell (ROI) masks from different sessions. Currently, the only
@@ -98,23 +122,25 @@ class Clustering:
     mask_prevalence: int = 50
     """Specifies the minimum percentage of all registered sessions that must include the clustered cell mask. Cell masks
-    present in fewer percent of sessions than this value are excluded from processing. This parameter is used to isolate
-    the cells that are present (active) across sessions."""
+    present in fewer percent of sessions than this value are excluded from processing. This parameter is used to filter
+    out cells that are mostly silent or not distinguishable across sessions."""
     pixel_prevalence: int = 50
-    """Specifies the minimum percentage of all registered sessions in which a pixel from a given cell mask must be
-    present for it to be used to construct the template mask. Pixels present in fewer percent of sessions than this
-    value are not used to define the 'template' mask coordinates. Template masks are used to extract the cell
-    fluorescence from the 'original' visual space of every session. This parameter is used to isolate the part of the
-    cell that is stable across sessions."""
+    """Specifies the minimum percentage of all registered sessions in which a cell mask pixel must be present for it to
+    be used to construct the template mask. Pixels present in fewer percent of sessions than this value are not used to
+    define the template masks. Template masks are used to extract the cell fluorescence from the original (non-deformed)
+    visual space of every session. This parameter is used to isolate the part of the cell that is stable across
+    sessions, which is required for the extraction step to work correctly (target only the tracked cell)."""
     step_sizes: list[int] = field(default_factory=lambda: [200, 200])
-    """Specifies the block size for the clustering process, in pixels. Clustering is applied in blocks of this size,
-    sampled across the processed plane image, to reduce the memory (RAM) overhead."""
+    """Specifies the block size for the cell clustering (across-session tracking) process, in pixels, in the order of
+    (height, width). To reduce the memory (RAM) overhead, the algorithm divides the deformed (shared) visual space into
+    blocks and then processes one (or more) blocks at a time."""
     bin_size: int = 50
     """Specifies the size of bins used to discover cell masks within blocks during clustering. To avoid edge cases, the
-    algorithm clusters the cell masks within the region defined by the center-point of each cell +- bin_size."""
+    algorithm clusters the cell masks within the region defined by the center-point of each cell +- bin_size. This works
+    on top of pre-sorting cells into spatial blocks defined by 'step_sizes'."""
     maximum_distance: int = 20
     """Specifies the maximum distance, in pixels, that can separate masks across multiple sessions. The clustering
@@ -122,72 +148,78 @@ class Clustering:
     cells during tacking."""
     minimum_size: int = 25
-    """The minimum size of the non-overlapping (with other cells) cell (ROI) region, in pixels, that has to be covered
-    by the template mask, for the cell to be assigned to that template. This is used to determine which template(s) the
-    cell belongs to (if any), for the purpose of tracking it across sessions."""
+    """The minimum size of the non-overlapping cell (ROI) region, in pixels, that has to be covered by the template
+    mask, for the cell to be assigned to that template. This is used to determine which template(s) the cell belongs to
+    (if any), for the purpose of tracking it across sessions."""
 @dataclass()
-class Demix:
-    """Stores settings used to deconvolve fluorescence signals from cells tracked across multiple days.
+class MultiDayS2PConfiguration(YamlConfig):
+    """Aggregates all parameters for the multi-day suite2p pipeline used to track cells across multiple days
+    (sessions) and extract their activity.
-    This step applies the suite2p spike deconvolution algorithm to the cell masks isolated during clustering to extract
-    the fluorescence of the cells tracked across multiple sessions (days). Generally, it should use the same parameters
-    as were used by the single-day suite2p pipeline.
+    These settings are used to configure the multi-day suite2p extraction pipeline, which is based on the reference
+    implementation here: https://github.com/sprustonlab/multiday-suite2p-public. This class behaves similar to the
+    SingleDayS2PConfiguration class. It can be saved and loaded from a .YAML file and translated to dictionary or
+    ops.npy format, expected by the multi-day sl-suite2p pipeline.
     """
-    baseline: str = "maximin"
-    """Specifies the method to compute the baseline of each trace. This baseline is then subtracted from each cell.
-    ‘maximin’ computes a moving baseline by filtering the data with a Gaussian of width 'sig_baseline' * 'fs', and then
-    minimum filtering with a window of 'win_baseline' * 'fs', and then maximum filtering with the same window.
-    ‘constant’ computes a constant baseline by filtering with a Gaussian of width 'sig_baseline' * 'fs' and then taking
-    the minimum value of this filtered trace. ‘constant_percentile’ computes a constant baseline by taking the
-    'prctile_baseline' percentile of the trace."""
+    io: IO = field(default_factory=IO)
+    """Stores parameters that control data input and output during various stages of the pipeline."""
+    hardware: Hardware = field(default_factory=Hardware)
+    """Stores parameters that control how the suite2p interacts with the hardware of the host-computer to accelerate
+    processing speed."""
+    cell_detection: CellDetection = field(default_factory=CellDetection)
+    """Stores parameters for selecting single-day-registered cells (ROIs) to be tracked across multiple sessions (days).
+    """
+    registration: Registration = field(default_factory=Registration)
+    """Stores parameters for aligning (registering) the sessions from multiple days to the same visual (sampling) space.
+    """
+    clustering: Clustering = field(default_factory=Clustering)
+    """Stores parameters for tracking (clustering) cell (ROI) masks across multiple registered sessions (days)."""
-    win_baseline: float = 60.0
-    """The time window, in seconds, over which to compute the baseline filter."""
+    def to_npy(self, output_directory: Path) -> None:
+        """Saves the managed configuration data as an 'ops.npy' file under the target directory.
-    sig_baseline: float = 10.0
-    """The standard deviation, in seconds, of the Gaussian filter applied to smooth the baseline signal."""
+        This method is mostly called by internal sl-suite2p functions to translate the user-specified configuration
+        file into the format used by suite2p pipelines.
-    l2_reg: float = 0.1
-    """The L2 regularization strength applied during spike deconvolution."""
+        Notes:
+            If the target output directory does not exist when this method is called, it will be created.
-    neucoeff: float = 0.7
-    """The neuropil coefficient applied for signal correction before deconvolution."""
+        Args:
+            output_directory: The path to the directory where the 'ops.npy' file should be saved.
+        """
+        ensure_directory_exists(output_directory)  # Creates the directory, if necessary
+        file_path = output_directory.joinpath("ops.npy")  # Computes the output path
+        np.save(file_path, self.to_ops(), allow_pickle=True)  # Dumps the configuration data to 'ops.npy' file.
+    def to_config(self, output_directory: Path) -> None:
+        """Saves the managed configuration data as a 'multi_day_s2p_configuration.yaml' file under the target
+        directory.
-@dataclass()
-class MultiDayS2PConfiguration(YamlConfig):
-    """Aggregates all parameters for the multi-day suite2p pipeline used to track cells across multiple days
-    (sessions) and extract their activity.
+        This method is typically used to dump the 'default' configuration parameters to disk as a user-editable
+        .yaml file. The user is then expected to modify these parameters as needed, before the class data is loaded and
+        used by the suite2p pipeline.
-    These settings are used to configure the multiday suite2p extraction pipeline, which is based on the reference
-    implementation here: https://github.com/sprustonlab/multiday-suite2p-public. This class behaves similar to the
-    SingleDayS2PConfiguration class. It can be saved and loaded from a .YAML file and translated to dictionary format,
-    expected by the multi-day sl-suite2p pipeline.
-    """
+        Notes:
+            If the target output directory does not exist when this method is called, it will be created.
-    cell_detection: CellDetection = field(default_factory=CellDetection)
-    """Stores parameters for selecting single-day-registered cells (ROIs) to be tracked across multiple sessions
-    (days)."""
-    registration: Registration = field(default_factory=Registration)
-    """Stores parameters for aligning (registering) the sessions from multiple days to the same visual space."""
-    clustering: Clustering = field(default_factory=Clustering)
-    """Stores parameters for clustering (tracking) cell (ROI) masks across multiple registered sessions."""
-    demix: Demix = field(default_factory=Demix)
-    """Stores settings used to deconvolve fluorescence signals from cells tracked across multiple days."""
-    io: IO = field(default_factory=IO)
-    """Stores parameters that control data input and output during various stages of the pipeline."""
+        Args:
+            output_directory: The path to the directory where the 'multi_day_s2p_configuration.yaml' file should be
+            saved.
+        """
+        ensure_directory_exists(output_directory)  # Creates the directory, if necessary
+        file_path = output_directory.joinpath("multi_day_s2p_configuration.yaml")   # Computes the output path
+        # Note, this uses the same configuration name as the SessionData class, making it automatically compatible with
+        # Sun lab data structure.
+        self.to_yaml(file_path=file_path)  # Dumps the data to a 'yaml' file.
     def to_ops(self) -> dict[str, Any]:
         """Converts the class instance to a dictionary and returns it to caller.
-        This dictionary can be passed to sl-suite2p multi-day functions as the 'ops' argument.
-        Notes:
-            Unlike the single-day configuration class, the dictionary generated by this method uses section names as
-            top level keys and parameter names as second-level keys. This mimics the original multiday-pipeline
-            configuration scheme.
+        This method is mostly called by internal sl-suite2p functions to translate the default configuration parameters
+        to the dictionary format used by suite2p pipelines.
         """
         return asdict(self)

sl-shared-assets 1.0.0rc15__py3-none-any.whl → 1.0.0rc17__py3-none-any.whl

Potentially problematic release.

sl-shared-assets 1.0.0rc15py3-none-any.whl → 1.0.0rc17py3-none-any.whl