PyPI - cellfinder - Versions diffs - 1.6.0__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

cellfinder 1.6.0py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

cellfinder/cli_migration_warning.py +3 -1
cellfinder/core/classify/classify.py +50 -4
cellfinder/core/classify/tools.py +13 -3
cellfinder/core/detect/detect.py +73 -58
cellfinder/core/detect/filters/plane/plane_filter.py +1 -1
cellfinder/core/detect/filters/setup_filters.py +31 -12
cellfinder/core/detect/filters/volume/ball_filter.py +5 -5
cellfinder/core/detect/filters/volume/structure_splitting.py +2 -0
cellfinder/core/detect/filters/volume/volume_filter.py +1 -1
cellfinder/core/download/download.py +2 -1
cellfinder/core/main.py +130 -16
cellfinder/core/tools/threading.py +4 -3
cellfinder/core/train/train_yaml.py +4 -13
cellfinder/napari/curation.py +18 -2
cellfinder/napari/detect/detect.py +61 -26
cellfinder/napari/detect/detect_containers.py +31 -8
cellfinder/napari/input_container.py +14 -4
cellfinder/napari/train/train.py +3 -7
cellfinder/napari/train/train_containers.py +0 -2
{cellfinder-1.6.0.dist-info → cellfinder-1.8.0.dist-info}/METADATA +5 -4
{cellfinder-1.6.0.dist-info → cellfinder-1.8.0.dist-info}/RECORD +25 -25
{cellfinder-1.6.0.dist-info → cellfinder-1.8.0.dist-info}/WHEEL +1 -1
{cellfinder-1.6.0.dist-info → cellfinder-1.8.0.dist-info}/entry_points.txt +0 -0
{cellfinder-1.6.0.dist-info → cellfinder-1.8.0.dist-info/licenses}/LICENSE +0 -0
{cellfinder-1.6.0.dist-info → cellfinder-1.8.0.dist-info}/top_level.txt +0 -0

cellfinder/cli_migration_warning.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import argparse
+from cellfinder.core import logger
 BRAINGLOBE_WORKFLOWS = "https://github.com/brainglobe/brainglobe-workflows"
 NEW_NAME = "brainmapper"
 BLOG_POST = "https://brainglobe.info/blog/version1/core_and_napari_merge.html"
@@ -36,7 +38,7 @@ def cli_catch() -> None:
         ),
     )
-    print(
+    logger.warning(
         "Hey, it looks like you're trying to run the old command-line tool.",
         "This workflow has been renamed and moved -",
         " you can now find it in the brainglobe-workflows package:\n",

cellfinder/core/classify/classify.py CHANGED Viewed

@@ -19,8 +19,8 @@ def main(
     signal_array: types.array,
     background_array: types.array,
     n_free_cpus: int,
-    voxel_sizes: Tuple[int, int, int],
-    network_voxel_sizes: Tuple[int, int, int],
+    voxel_sizes: Tuple[float, float, float],
+    network_voxel_sizes: Tuple[float, float, float],
     batch_size: int,
     cube_height: int,
     cube_width: int,
@@ -29,12 +29,58 @@ def main(
     model_weights: Optional[os.PathLike],
     network_depth: depth_type,
     max_workers: int = 3,
+    pin_memory: bool = False,
     *,
     callback: Optional[Callable[[int], None]] = None,
 ) -> List[Cell]:
     """
     Parameters
     ----------
+    points: List of Cell objects
+        The potential cells to classify.
+    signal_array : numpy.ndarray or dask array
+        3D array representing the signal data in z, y, x order.
+    background_array : numpy.ndarray or dask array
+        3D array representing the signal data in z, y, x order.
+    n_free_cpus : int
+        How many CPU cores to leave free.
+    voxel_sizes : 3-tuple of floats
+        Size of your voxels in the z, y, and x dimensions.
+    network_voxel_sizes : 3-tuple of floats
+        Size of the pre-trained network's voxels in the z, y, and x dimensions.
+    batch_size : int
+        How many potential cells to classify at one time. The GPU/CPU
+        memory must be able to contain at once this many data cubes for
+        the models. For performance-critical applications, tune to maximize
+        memory usage without running out. Check your GPU/CPU memory to verify
+        it's not full.
+    cube_height: int
+        The height of the data cube centered on the cell used for
+        classification. Defaults to `50`.
+    cube_width: int
+        The width of the data cube centered on the cell used for
+        classification. Defaults to `50`.
+    cube_depth: int
+        The depth of the data cube centered on the cell used for
+        classification. Defaults to `20`.
+    trained_model : Optional[Path]
+        Trained model file path (home directory (default) -> pretrained
+        weights).
+    model_weights : Optional[Path]
+        Model weights path (home directory (default) -> pretrained
+        weights).
+    network_depth: str
+        The network depth to use during classification. Defaults to `"50"`.
+    max_workers: int
+        The number of sub-processes to use for data loading / processing.
+        Defaults to 8.
+    pin_memory: bool
+        Pins data to be sent to the GPU to the CPU memory. This allows faster
+        GPU data speeds, but can only be used if the data used by the GPU can
+        stay in the CPU RAM while the GPU uses it. I.e. there's enough RAM.
+        Otherwise, if there's a risk of the RAM being paged, it shouldn't be
+        used. Defaults to False.
     callback : Callable[int], optional
         A callback function that is called during classification. Called with
         the batch number once that batch has been classified.
@@ -70,7 +116,7 @@ def main(
     )
     if trained_model and Path(trained_model).suffix == ".h5":
-        print(
+        logger.warning(
             "Weights provided in place of the model, "
             "loading weights into default model."
         )
@@ -103,7 +149,7 @@ def main(
         points_list.append(cell)
     time_elapsed = datetime.now() - start_time
-    print(
+    logger.info(
         "Classfication complete - all points done in : {}".format(time_elapsed)
     )

cellfinder/core/classify/tools.py CHANGED Viewed

@@ -47,9 +47,19 @@ def get_model(
                 f"Setting model weights according to: {model_weights}",
             )
             if model_weights is None:
-                raise OSError("`model_weights` must be provided")
-            model.load_weights(model_weights)
-        return model
+                raise OSError(
+                    "`model_weights` must be provided for inference "
+                    "or continued training."
+                )
+            try:
+                model.load_weights(model_weights)
+            except (OSError, ValueError) as e:
+                raise ValueError(
+                    f"Error loading weights: {model_weights}.\n"
+                    "Provided weights don't match the model architecture.\n"
+                ) from e
+    return model
 def make_lists(

cellfinder/core/detect/detect.py CHANGED Viewed

@@ -48,12 +48,12 @@ def main(
     save_planes: bool = False,
     plane_directory: Optional[str] = None,
     batch_size: Optional[int] = None,
-    torch_device: str = "cpu",
-    use_scipy: bool = True,
-    split_ball_xy_size: int = 3,
-    split_ball_z_size: int = 3,
+    torch_device: Optional[str] = None,
+    pin_memory: bool = False,
+    split_ball_xy_size: float = 6,
+    split_ball_z_size: float = 15,
     split_ball_overlap_fraction: float = 0.8,
-    split_soma_diameter: int = 7,
+    n_splitting_iter: int = 10,
     *,
     callback: Optional[Callable[[int], None]] = None,
 ) -> List[Cell]:
@@ -62,69 +62,80 @@ def main(
     Parameters
     ----------
-    signal_array : numpy.ndarray
-        3D array representing the signal data.
+    signal_array : numpy.ndarray or dask array
+        3D array representing the signal data in z, y, x order.
     start_plane : int
-        Index of the starting plane for detection.
+        First plane index to process (inclusive, to process a subset of the
+        data).
     end_plane : int
-        Index of the ending plane for detection.
-    voxel_sizes : Tuple[float, float, float]
-        Tuple of voxel sizes in each dimension (z, y, x).
+        Last plane index to process (exclusive, to process a subset of the
+        data).
+    voxel_sizes : 3-tuple of floats
+        Size of your voxels in the z, y, and x dimensions (microns).
     soma_diameter : float
-        Diameter of the soma in physical units.
+        The expected in-plane (xy) soma diameter (microns).
     max_cluster_size : float
-        Maximum size of a cluster in physical units.
+        Largest detected cell cluster (in cubic um) where splitting
+        should be attempted. Clusters above this size will be labeled
+        as artifacts.
     ball_xy_size : float
-        Size of the XY ball used for filtering in physical units.
+        3d filter's in-plane (xy) filter ball size (microns).
     ball_z_size : float
-        Size of the Z ball used for filtering in physical units.
+        3d filter's axial (z) filter ball size (microns).
     ball_overlap_fraction : float
-        Fraction of overlap allowed between balls.
+        3d filter's fraction of the ball filter needed to be filled by
+        foreground voxels, centered on a voxel, to retain the voxel.
     soma_spread_factor : float
-        Spread factor for soma size.
+        Cell spread factor for determining the largest cell volume before
+        splitting up cell clusters. Structures with spherical volume of
+        diameter `soma_spread_factor * soma_diameter` or less will not be
+        split.
     n_free_cpus : int
-        Number of free CPU cores available for parallel processing.
+        How many CPU cores to leave free.
     log_sigma_size : float
-        Size of the sigma for the log filter.
+        Gaussian filter width (as a fraction of soma diameter) used during
+        2d in-plane Laplacian of Gaussian filtering.
     n_sds_above_mean_thresh : float
-        Number of standard deviations above the mean threshold.
+        Intensity threshold (the number of standard deviations above
+        the mean) of the filtered 2d planes used to mark pixels as
+        foreground or background.
     outlier_keep : bool, optional
         Whether to keep outliers during detection. Defaults to False.
     artifact_keep : bool, optional
         Whether to keep artifacts during detection. Defaults to False.
     save_planes : bool, optional
         Whether to save the planes during detection. Defaults to False.
     plane_directory : str, optional
         Directory path to save the planes. Defaults to None.
-    batch_size : int, optional
-        The number of planes to process in each batch. Defaults to 1.
-        For CPU, there's no benefit for a larger batch size. Only a memory
-        usage increase. For CUDA, the larger the batch size the better the
-        performance. Until it fills up the GPU memory - after which it
-        becomes slower.
+    batch_size: int
+        The number of planes of the original data volume to process at
+        once. The GPU/CPU memory must be able to contain this many planes
+        for all the filters. For performance-critical applications, tune to
+        maximize memory usage without running out. Check your GPU/CPU memory
+        to verify it's not full.
     torch_device : str, optional
-        The device on which to run the computation. By default, it's "cpu".
-        To run on a gpu, specify the PyTorch device name, such as "cuda" to
-        run on the first GPU.
+        The device on which to run the computation. If not specified (None),
+        "cuda" will be used if a GPU is available, otherwise "cpu".
+        You can also manually specify "cuda" or "cpu".
+    pin_memory: bool
+        Pins data to be sent to the GPU to the CPU memory. This allows faster
+        GPU data speeds, but can only be used if the data used by the GPU can
+        stay in the CPU RAM while the GPU uses it. I.e. there's enough RAM.
+        Otherwise, if there's a risk of the RAM being paged, it shouldn't be
+        used. Defaults to False.
+    split_ball_xy_size: float
+        Similar to `ball_xy_size`, except the value to use for the 3d
+        filter during cluster splitting.
+    split_ball_z_size: float
+        Similar to `ball_z_size`, except the value to use for the 3d filter
+        during cluster splitting.
+    split_ball_overlap_fraction: float
+        Similar to `ball_overlap_fraction`, except the value to use for the
+        3d filter during cluster splitting.
+    n_splitting_iter: int
+        The number of iterations to run the 3d filtering on a cluster. Each
+        iteration reduces the cluster size by the voxels not retained in
+        the previous iteration.
     callback : Callable[int], optional
         A callback function that is called every time a plane has finished
         being processed. Called with the plane number that has finished.
@@ -132,9 +143,11 @@ def main(
     Returns
     -------
     List[Cell]
-        List of detected cells.
+        List of detected cell candidates.
     """
     start_time = datetime.now()
+    if torch_device is None:
+        torch_device = "cuda" if torch.cuda.is_available() else "cpu"
     if batch_size is None:
         if torch_device == "cpu":
             batch_size = 4
@@ -155,6 +168,12 @@ def main(
     end_plane = min(len(signal_array), end_plane)
     torch_device = torch_device.lower()
+    # Use SciPy filtering on CPU (better performance); use PyTorch on GPU
+    if torch_device != "cuda":
+        use_scipy = True
+    else:
+        use_scipy = False
     batch_size = max(batch_size, 1)
     # brainmapper can pass them in as str
     voxel_sizes = list(map(float, voxel_sizes))
@@ -180,19 +199,16 @@ def main(
         plane_directory=plane_directory,
         batch_size=batch_size,
         torch_device=torch_device,
+        pin_memory=pin_memory,
+        n_splitting_iter=n_splitting_iter,
     )
     # replicate the settings specific to splitting, before we access anything
     # of the original settings, causing cached properties
     kwargs = dataclasses.asdict(settings)
-    kwargs["ball_z_size_um"] = split_ball_z_size * settings.z_pixel_size
-    kwargs["ball_xy_size_um"] = (
-        split_ball_xy_size * settings.in_plane_pixel_size
-    )
+    kwargs["ball_z_size_um"] = split_ball_z_size
+    kwargs["ball_xy_size_um"] = split_ball_xy_size
     kwargs["ball_overlap_fraction"] = split_ball_overlap_fraction
-    kwargs["soma_diameter_um"] = (
-        split_soma_diameter * settings.in_plane_pixel_size
-    )
     # always run on cpu because copying to gpu overhead is likely slower than
     # any benefit for detection on smallish volumes
     kwargs["torch_device"] = "cpu"
@@ -231,6 +247,5 @@ def main(
     time_elapsed = datetime.now() - start_time
     s = f"Detection complete. Found {len(cells)} cells in {time_elapsed}"
-    logger.debug(s)
-    print(s)
+    logger.info(s)
     return cells

cellfinder/core/detect/filters/plane/plane_filter.py CHANGED Viewed

@@ -39,7 +39,7 @@ class TileProcessor:
         Number of standard deviations above the mean threshold to use for
         determining whether a voxel is bright.
     log_sigma_size : float
-        Size of the sigma for the gaussian filter.
+        Size of the Gaussian sigma for the Laplacian of Gaussian filtering.
     soma_diameter : float
         Diameter of the soma in voxels.
     torch_device: str

cellfinder/core/detect/filters/setup_filters.py CHANGED Viewed

@@ -80,23 +80,28 @@ class DetectionSettings:
     voxel_sizes: Tuple[float, float, float] = (1.0, 1.0, 1.0)
     """
-    Tuple of voxel sizes in each dimension (z, y, x). We use this to convert
-    from `um` to pixel sizes.
+    Tuple of voxel sizes (microns) in each dimension (z, y, x). We use this
+    to convert from `um` to pixel sizes.
     """
     soma_spread_factor: float = 1.4
-    """Spread factor for soma size - how much it may stretch in the images."""
+    """
+    Cell spread factor for determining the largest cell volume before
+    splitting up cell clusters. Structures with spherical volume of
+    diameter `soma_spread_factor * soma_diameter` or less will not be
+    split.
+    """
     soma_diameter_um: float = 16
     """
-    Diameter of a typical soma in um. Bright areas larger than this will be
-    split.
+    Diameter of a typical soma in-plane (xy) in microns.
     """
     max_cluster_size_um3: float = 100_000
     """
-    Maximum size of a cluster (bright area) that will be processed, in um.
-    Larger bright areas are skipped as artifacts.
+    Largest detected cell cluster (in cubic um) where splitting
+    should be attempted. Clusters above this size will be labeled
+    as artifacts.
     """
     ball_xy_size_um: float = 6
@@ -116,17 +121,21 @@ class DetectionSettings:
     ball_overlap_fraction: float = 0.6
     """
-    Fraction of overlap between a bright area and the spherical kernel,
-    for the area to be considered a single ball.
+    Fraction of the 3d ball filter needed to be filled by foreground voxels,
+    centered on a voxel, to retain the voxel.
     """
     log_sigma_size: float = 0.2
-    """Size of the sigma for the 2d Gaussian filter."""
+    """
+    Gaussian filter width (as a fraction of soma diameter) used during
+    2d in-plane Laplacian of Gaussian filtering.
+    """
     n_sds_above_mean_thresh: float = 10
     """
-    Number of standard deviations above the mean intensity to use for a
-    threshold to define bright areas. Below it, it's not considered bright.
+    Intensity threshold (the number of standard deviations above
+    the mean) of the filtered 2d planes used to mark pixels as
+    foreground or background.
     """
     outlier_keep: bool = False
@@ -180,6 +189,14 @@ class DetectionSettings:
     to run on the first GPU.
     """
+    pin_memory: bool = False
+    """
+    Pins data to be sent to the GPU to the CPU memory. This allows faster GPU
+    data speeds, but can only be used if the data used by the GPU can stay in
+    the CPU RAM while the GPU uses it. I.e. there's enough RAM. Otherwise, if
+    there's a risk of the RAM being paged, it shouldn't be used.
+    """
     n_free_cpus: int = 2
     """
     Number of free CPU cores to keep available and not use during parallel
@@ -191,6 +208,8 @@ class DetectionSettings:
     """
     During the structure splitting phase we iteratively shrink the bright areas
     and re-filter with the 3d filter. This is the number of iterations to do.
+    Each iteration reduces the cluster size by the voxels not retained in the
+    previous iteration.
     This is a maximum because we also stop if there are no more structures left
     during any iteration.

cellfinder/core/detect/filters/volume/ball_filter.py CHANGED Viewed

@@ -78,11 +78,11 @@ class BallFilter:
     ----------
     plane_height, plane_width : int
         Height/width of the planes.
-    ball_xy_size : int
-        Diameter of the spherical kernel in the x/y dimensions.
-    ball_z_size : int
-        Diameter of the spherical kernel in the z dimension.
-        Equal to the number of planes stacked to filter
+    ball_xy_size : float
+        Diameter of the spherical kernel (in microns) in the x/y dimensions.
+    ball_z_size : float
+        Diameter of the spherical kernel in the z dimension in microns.
+        Determines the number of planes stacked to filter
         the central plane of the stack.
     overlap_fraction : float
         The fraction of pixels within the spherical kernel that

cellfinder/core/detect/filters/volume/structure_splitting.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from copy import copy
 from typing import List, Tuple, Type
 import numpy as np
@@ -224,6 +225,7 @@ def split_cells(
             where M is the number of individual cells and each centre is
             represented by its x, y, and z coordinates.
     """
+    settings = copy(settings)
     # these points are in x, y, z order columnwise, in absolute pixels
     orig_centre = get_structure_centre(cell_points)

cellfinder/core/detect/filters/volume/volume_filter.py CHANGED Viewed

@@ -140,7 +140,7 @@ class VolumeFilter:
             tensor = torch.empty(
                 (batch_size, *self.settings.plane_shape),
                 dtype=torch_dtype,
-                pin_memory=not cpu,
+                pin_memory=not cpu and self.settings.pin_memory,
                 device="cpu",
             )

cellfinder/core/download/download.py CHANGED Viewed

@@ -6,6 +6,7 @@ import pooch
 from brainglobe_utils.general.config import get_config_obj
 from cellfinder import DEFAULT_CELLFINDER_DIRECTORY
+from cellfinder.core import logger
 from cellfinder.core.tools.source_files import (
     default_configuration_path,
     user_specific_configuration_path,
@@ -74,7 +75,7 @@ def amend_user_configuration(new_model_path=None) -> None:
     new_model_path : Path, optional
         The path to the new model configuration.
     """
-    print("(Over-)writing custom user configuration")
+    logger.info("(Over-)writing custom user configuration")
     original_config = default_configuration_path()
     new_config = user_specific_configuration_path()

cellfinder 1.6.0__py3-none-any.whl → 1.8.0__py3-none-any.whl

cellfinder 1.6.0py3-none-any.whl → 1.8.0py3-none-any.whl