PyPI - cellfinder - Versions diffs - 1.1.3__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

cellfinder 1.1.3py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cellfinder might be problematic. Click here for more details.

Files changed (34) hide show

cellfinder/__init__.py +21 -12
cellfinder/core/classify/classify.py +13 -6
cellfinder/core/classify/cube_generator.py +27 -11
cellfinder/core/classify/resnet.py +9 -6
cellfinder/core/classify/tools.py +13 -11
cellfinder/core/detect/detect.py +12 -1
cellfinder/core/detect/filters/volume/ball_filter.py +198 -113
cellfinder/core/detect/filters/volume/structure_detection.py +105 -41
cellfinder/core/detect/filters/volume/structure_splitting.py +1 -1
cellfinder/core/detect/filters/volume/volume_filter.py +48 -49
cellfinder/core/download/cli.py +39 -32
cellfinder/core/download/download.py +44 -56
cellfinder/core/main.py +53 -68
cellfinder/core/tools/prep.py +12 -20
cellfinder/core/tools/source_files.py +5 -3
cellfinder/core/tools/system.py +10 -0
cellfinder/core/train/train_yml.py +29 -27
cellfinder/napari/curation.py +1 -1
cellfinder/napari/detect/detect.py +259 -58
cellfinder/napari/detect/detect_containers.py +11 -1
cellfinder/napari/detect/thread_worker.py +16 -2
cellfinder/napari/train/train.py +2 -9
cellfinder/napari/train/train_containers.py +3 -3
cellfinder/napari/utils.py +88 -47
{cellfinder-1.1.3.dist-info → cellfinder-1.3.0.dist-info}/METADATA +12 -11
{cellfinder-1.1.3.dist-info → cellfinder-1.3.0.dist-info}/RECORD +30 -34
cellfinder/core/download/models.py +0 -49
cellfinder/core/tools/IO.py +0 -48
cellfinder/core/tools/tf.py +0 -46
cellfinder/napari/images/brainglobe.png +0 -0
{cellfinder-1.1.3.dist-info → cellfinder-1.3.0.dist-info}/LICENSE +0 -0
{cellfinder-1.1.3.dist-info → cellfinder-1.3.0.dist-info}/WHEEL +0 -0
{cellfinder-1.1.3.dist-info → cellfinder-1.3.0.dist-info}/entry_points.txt +0 -0
{cellfinder-1.1.3.dist-info → cellfinder-1.3.0.dist-info}/top_level.txt +0 -0

cellfinder/core/detect/filters/volume/structure_detection.py CHANGED Viewed

@@ -1,14 +1,22 @@
 from dataclasses import dataclass
-from typing import Dict, Optional, TypeVar
+from typing import Dict, Optional, Tuple, TypeVar, Union
 import numba.typed
 import numpy as np
 import numpy.typing as npt
-from numba import njit
+from numba import njit, typed
 from numba.core import types
 from numba.experimental import jitclass
 from numba.types import DictType
+T = TypeVar("T")
+# type used for the domain of the volume - the size of the vol
+vol_np_type = np.int64
+vol_numba_type = types.int64
+# type used for the structure id
+sid_np_type = np.int64
+sid_numba_type = types.int64
 @dataclass
 class Point:
@@ -32,18 +40,15 @@ def get_non_zero_dtype_min(values: np.ndarray) -> int:
     return min_val
-T = TypeVar("T")
 @njit
 def traverse_dict(d: Dict[T, T], a: T) -> T:
     """
     Traverse d, until a is not present as a key.
     """
-    if a in d:
-        return traverse_dict(d, d[a])
-    else:
-        return a
+    value = a
+    while value in d:
+        value = d[value]
+    return value
 @njit
@@ -54,14 +59,28 @@ def get_structure_centre(structure: np.ndarray) -> np.ndarray:
     Centre calculated as the mean of each pixel coordinate,
     rounded to the nearest integer.
     """
-    # can't do np.mean(structure, axis=0)
-    # because axis is not supported by numba
+    # numba support axis for sum, but not mean
+    return np.round(np.sum(structure, axis=0) / structure.shape[0])
+@njit
+def _get_structure_centre(structure: types.ListType) -> np.ndarray:
+    # See get_structure_centre.
+    # this is for our own points stored as list optimized by numba
+    a_sum = 0.0
+    b_sum = 0.0
+    c_sum = 0.0
+    for a, b, c in structure:
+        a_sum += a
+        b_sum += b
+        c_sum += c
     return np.round(
         np.array(
             [
-                np.mean(structure[:, 0]),
-                np.mean(structure[:, 1]),
-                np.mean(structure[:, 2]),
+                a_sum / len(structure),
+                b_sum / len(structure),
+                c_sum / len(structure),
             ]
         )
     )
@@ -69,15 +88,18 @@ def get_structure_centre(structure: np.ndarray) -> np.ndarray:
 # Type declaration has to come outside of the class,
 # see https://github.com/numba/numba/issues/8808
-uint_2d_type = types.uint64[:, :]
+tuple_point_type = types.Tuple(
+    (vol_numba_type, vol_numba_type, vol_numba_type)
+)
+list_of_points_type = types.ListType(tuple_point_type)
 spec = [
-    ("z", types.uint64),
-    ("next_structure_id", types.uint64),
-    ("shape", types.UniTuple(types.int64, 2)),
-    ("obsolete_ids", DictType(types.int64, types.int64)),
-    ("coords_maps", DictType(types.uint64, uint_2d_type)),
+    ("z", vol_numba_type),
+    ("next_structure_id", sid_numba_type),
+    ("shape", types.UniTuple(vol_numba_type, 2)),
+    ("obsolete_ids", DictType(sid_numba_type, sid_numba_type)),
+    ("coords_maps", DictType(sid_numba_type, list_of_points_type)),
 ]
@@ -103,8 +125,12 @@ class CellDetector:
         are scanned.
     coords_maps :
         Mapping from structure ID to the coordinates of pixels within that
-        structure. Coordinates are stored in a 2D array, with the second
-        axis indexing (x, y, z) coordinates.
+        structure. Coordinates are stored in a list of (x, y, z) tuples of
+        the coordinates.
+        Use `get_structures` to get it as a dict whose values are each
+        a 2D array, where rows are points, and columns x, y, z of the
+        points.
     """
     def __init__(self, width: int, height: int, start_z: int):
@@ -123,11 +149,11 @@ class CellDetector:
         # Mapping from obsolete IDs to the IDs that they have been
         # made obsolete by
         self.obsolete_ids = numba.typed.Dict.empty(
-            key_type=types.int64, value_type=types.int64
+            key_type=sid_numba_type, value_type=sid_numba_type
         )
         # Mapping from IDs to list of points in that structure
         self.coords_maps = numba.typed.Dict.empty(
-            key_type=types.int64, value_type=uint_2d_type
+            key_type=sid_numba_type, value_type=list_of_points_type
         )
     def process(
@@ -136,7 +162,7 @@ class CellDetector:
         """
         Process a new plane.
         """
-        if [e for e in plane.shape[:2]] != [e for e in self.shape]:
+        if plane.shape[:2] != self.shape:
             raise ValueError("plane does not have correct shape")
         plane = self.connect_four(plane, previous_plane)
@@ -166,7 +192,7 @@ class CellDetector:
             for x in range(plane.shape[0]):
                 if plane[x, y] == SOMA_CENTRE_VALUE:
                     # Labels of structures below, left and behind
-                    neighbour_ids = np.zeros(3, dtype=np.uint64)
+                    neighbour_ids = np.zeros(3, dtype=sid_np_type)
                     # If in bounds look at neighbours
                     if x > 0:
                         neighbour_ids[0] = plane[x - 1, y]
@@ -191,17 +217,54 @@ class CellDetector:
     def get_cell_centres(self) -> np.ndarray:
         return self.structures_to_cells()
-    def get_coords_dict(self) -> Dict:
-        return self.coords_maps
+    def get_structures(self) -> Dict[int, np.ndarray]:
+        """
+        Gets the structures as a dict of structure IDs mapped to the 2D array
+        of structure points.
+        """
+        d = {}
+        for sid, points in self.coords_maps.items():
+            # numba silliness - it cannot handle
+            # `item = np.array(points, dtype=vol_np_type)` so we need to create
+            # array and then fill in the point
+            item = np.empty((len(points), 3), dtype=vol_np_type)
+            d[sid] = item
+            for i, point in enumerate(points):
+                item[i, :] = point
+        return d
+    def add_point(
+        self, sid: int, point: Union[tuple, list, np.ndarray]
+    ) -> None:
+        """
+        Add single 3d *point* to the structure with the given *sid*.
+        """
+        if sid not in self.coords_maps:
+            self.coords_maps[sid] = typed.List.empty_list(tuple_point_type)
+        self._add_point(sid, (int(point[0]), int(point[1]), int(point[2])))
-    def add_point(self, sid: int, point: np.ndarray) -> None:
+    def add_points(self, sid: int, points: np.ndarray):
         """
-        Add *point* to the structure with the given *sid*.
+        Adds ndarray of *points* to the structure with the given *sid*.
+        Each row is a 3d point.
         """
-        self.coords_maps[sid] = np.row_stack((self.coords_maps[sid], point))
+        if sid not in self.coords_maps:
+            self.coords_maps[sid] = typed.List.empty_list(tuple_point_type)
+        append = self.coords_maps[sid].append
+        pts = np.round(points).astype(vol_np_type)
+        for point in pts:
+            append((point[0], point[1], point[2]))
+    def _add_point(self, sid: int, point: Tuple[int, int, int]) -> None:
+        # sid must exist
+        self.coords_maps[sid].append(point)
     def add(
-        self, x: int, y: int, z: int, neighbour_ids: npt.NDArray[np.uint64]
+        self, x: int, y: int, z: int, neighbour_ids: npt.NDArray[sid_np_type]
     ) -> int:
         """
         For the current coordinates takes all the neighbours and find the
@@ -215,17 +278,16 @@ class CellDetector:
         """
         updated_id = self.sanitise_ids(neighbour_ids)
         if updated_id not in self.coords_maps:
-            self.coords_maps[updated_id] = np.zeros(
-                shape=(0, 3), dtype=np.uint64
+            self.coords_maps[updated_id] = typed.List.empty_list(
+                tuple_point_type
             )
         self.merge_structures(updated_id, neighbour_ids)
         # Add point for that structure
-        point = np.array([[x, y, z]], dtype=np.uint64)
-        self.add_point(updated_id, point)
+        self._add_point(updated_id, (int(x), int(y), int(z)))
         return updated_id
-    def sanitise_ids(self, neighbour_ids: npt.NDArray[np.uint64]) -> int:
+    def sanitise_ids(self, neighbour_ids: npt.NDArray[sid_np_type]) -> int:
         """
         Get the smallest ID of all the structures that are connected to IDs
         in `neighbour_ids`.
@@ -246,7 +308,7 @@ class CellDetector:
         return int(updated_id)
     def merge_structures(
-        self, updated_id: int, neighbour_ids: npt.NDArray[np.uint64]
+        self, updated_id: int, neighbour_ids: npt.NDArray[sid_np_type]
     ) -> None:
         """
         For all the neighbours, reassign all the points of neighbour to
@@ -261,14 +323,16 @@ class CellDetector:
             # minimise ID so if neighbour with higher ID, reassign its points
             # to current
             if neighbour_id > updated_id:
-                self.add_point(updated_id, self.coords_maps[neighbour_id])
+                self.coords_maps[updated_id].extend(
+                    self.coords_maps[neighbour_id]
+                )
                 self.coords_maps.pop(neighbour_id)
                 self.obsolete_ids[neighbour_id] = updated_id
     def structures_to_cells(self) -> np.ndarray:
-        cell_centres = np.empty((len(self.coords_maps.keys()), 3))
+        cell_centres = np.empty((len(self.coords_maps), 3))
         for idx, structure in enumerate(self.coords_maps.values()):
-            p = get_structure_centre(structure)
+            p = _get_structure_centre(structure)
             cell_centres[idx] = p
         return cell_centres

cellfinder/core/detect/filters/volume/structure_splitting.py CHANGED Viewed

@@ -71,7 +71,7 @@ def ball_filter_imgs(
     """
     # OPTIMISE: reuse ball filter instance
-    good_tiles_mask = np.ones((1, 1, volume.shape[2]), dtype=bool)
+    good_tiles_mask = np.ones((1, 1, volume.shape[2]), dtype=np.bool_)
     plane_width, plane_height = volume.shape[:2]

cellfinder/core/detect/filters/volume/volume_filter.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import math
+import multiprocessing.pool
 import os
+from functools import partial
 from queue import Queue
 from threading import Lock
 from typing import Any, Callable, List, Optional, Tuple
@@ -77,7 +79,7 @@ class VolumeFilter(object):
         locks: List[Lock],
         *,
         callback: Callable[[int], None],
-    ) -> List[Cell]:
+    ) -> None:
         progress_bar = tqdm(total=self.n_planes, desc="Processing planes")
         for z in range(self.n_planes):
             # Get result from the queue.
@@ -108,11 +110,13 @@ class VolumeFilter(object):
         progress_bar.close()
         logger.debug("3D filter done")
-        return self.get_results()
     def _run_filter(self) -> None:
         logger.debug(f"🏐 Ball filtering plane {self.z}")
-        self.ball_filter.walk()
+        # filtering original images, the images should be large enough in x/y
+        # to benefit from parallelization. Note: don't pass arg as keyword arg
+        # because numba gets stuck (probably b/c class jit is new)
+        self.ball_filter.walk(True)
         middle_plane = self.ball_filter.get_middle_plane()
         if self.save_planes:
@@ -134,7 +138,7 @@ class VolumeFilter(object):
         f_path = os.path.join(self.plane_directory, plane_name)
         tifffile.imsave(f_path, plane.T)
-    def get_results(self) -> List[Cell]:
+    def get_results(self, worker_pool: multiprocessing.Pool) -> List[Cell]:
         logger.info("Splitting cell clusters and writing results")
         max_cell_volume = sphere_volume(
@@ -142,62 +146,57 @@ class VolumeFilter(object):
         )
         cells = []
+        needs_split = []
+        structures = self.cell_detector.get_structures().items()
+        logger.debug(f"Processing {len(structures)} found cells")
-        logger.debug(
-            f"Processing {len(self.cell_detector.coords_maps.items())} cells"
-        )
-        for cell_id, cell_points in self.cell_detector.coords_maps.items():
+        # first get all the cells that are not clusters
+        for cell_id, cell_points in structures:
             cell_volume = len(cell_points)
             if cell_volume < max_cell_volume:
                 cell_centre = get_structure_centre(cell_points)
-                cells.append(
-                    Cell(
-                        (
-                            cell_centre[0],
-                            cell_centre[1],
-                            cell_centre[2],
-                        ),
-                        Cell.UNKNOWN,
-                    )
-                )
+                cells.append(Cell(cell_centre.tolist(), Cell.UNKNOWN))
             else:
                 if cell_volume < self.max_cluster_size:
-                    try:
-                        cell_centres = split_cells(
-                            cell_points, outlier_keep=self.outlier_keep
-                        )
-                    except (ValueError, AssertionError) as err:
-                        raise StructureSplitException(
-                            f"Cell {cell_id}, error; {err}"
-                        )
-                    for cell_centre in cell_centres:
-                        cells.append(
-                            Cell(
-                                (
-                                    cell_centre[0],
-                                    cell_centre[1],
-                                    cell_centre[2],
-                                ),
-                                Cell.UNKNOWN,
-                            )
-                        )
+                    needs_split.append((cell_id, cell_points))
                 else:
                     cell_centre = get_structure_centre(cell_points)
-                    cells.append(
-                        Cell(
-                            (
-                                cell_centre[0],
-                                cell_centre[1],
-                                cell_centre[2],
-                            ),
-                            Cell.ARTIFACT,
-                        )
-                    )
-        logger.debug("Finished splitting cell clusters.")
+                    cells.append(Cell(cell_centre.tolist(), Cell.ARTIFACT))
+        if not needs_split:
+            logger.debug("Finished splitting cell clusters - none found")
+            return cells
+        # now split clusters into cells
+        logger.debug(f"Splitting {len(needs_split)} clusters")
+        progress_bar = tqdm(
+            total=len(needs_split), desc="Splitting cell clusters"
+        )
+        # we are not returning Cell instances from func because it'd be pickled
+        # by multiprocess which slows it down
+        func = partial(_split_cells, outlier_keep=self.outlier_keep)
+        for cell_centres in worker_pool.imap_unordered(func, needs_split):
+            for cell_centre in cell_centres:
+                cells.append(Cell(cell_centre.tolist(), Cell.UNKNOWN))
+            progress_bar.update()
+        progress_bar.close()
+        logger.debug(
+            f"Finished splitting cell clusters. Found {len(cells)} total cells"
+        )
         return cells
+def _split_cells(arg, outlier_keep):
+    cell_id, cell_points = arg
+    try:
+        return split_cells(cell_points, outlier_keep=outlier_keep)
+    except (ValueError, AssertionError) as err:
+        raise StructureSplitException(f"Cell {cell_id}, error; {err}")
 def sphere_volume(radius: float) -> float:
     return (4 / 3) * math.pi * radius**3

cellfinder/core/download/cli.py CHANGED Viewed

@@ -1,17 +1,29 @@
-import tempfile
 from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
 from pathlib import Path
-from cellfinder.core.download import models
-from cellfinder.core.download.download import amend_user_configuration
+from cellfinder.core.download.download import (
+    DEFAULT_DOWNLOAD_DIRECTORY,
+    amend_user_configuration,
+    download_models,
+)
-home = Path.home()
-DEFAULT_DOWNLOAD_DIRECTORY = home / ".cellfinder"
-temp_dir = tempfile.TemporaryDirectory()
-temp_dir_path = Path(temp_dir.name)
+def download_parser(parser: ArgumentParser) -> ArgumentParser:
+    """
+    Configure the argument parser for downloading files.
+    Parameters
+    ----------
+    parser : ArgumentParser
+        The argument parser to configure.
+    Returns
+    -------
+    ArgumentParser
+        The configured argument parser.
+    """
-def download_directory_parser(parser):
     parser.add_argument(
         "--install-path",
         dest="install_path",
@@ -19,29 +31,12 @@ def download_directory_parser(parser):
         default=DEFAULT_DOWNLOAD_DIRECTORY,
         help="The path to install files to.",
     )
-    parser.add_argument(
-        "--download-path",
-        dest="download_path",
-        type=Path,
-        default=temp_dir_path,
-        help="The path to download files into.",
-    )
     parser.add_argument(
         "--no-amend-config",
         dest="no_amend_config",
         action="store_true",
         help="Don't amend the config file",
     )
-    return parser
-def model_parser(parser):
-    parser.add_argument(
-        "--no-models",
-        dest="no_models",
-        action="store_true",
-        help="Don't download the model",
-    )
     parser.add_argument(
         "--model",
         dest="model",
@@ -52,17 +47,29 @@ def model_parser(parser):
     return parser
-def download_parser():
+def get_parser() -> ArgumentParser:
+    """
+    Create an argument parser for downloading files.
+    Returns
+    -------
+    ArgumentParser
+        The configured argument parser.
+    """
     parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
-    parser = model_parser(parser)
-    parser = download_directory_parser(parser)
+    parser = download_parser(parser)
     return parser
-def main():
-    args = download_parser().parse_args()
-    if not args.no_models:
-        model_path = models.main(args.model, args.install_path)
+def main() -> None:
+    """
+    Run the main download function, and optionally amend the user
+    configuration.
+    """
+    args = get_parser().parse_args()
+    model_path = download_models(args.model, args.install_path)
     if not args.no_amend_config:
         amend_user_configuration(new_model_path=model_path)

cellfinder/core/download/download.py CHANGED Viewed

@@ -1,79 +1,67 @@
 import os
-import shutil
-import tarfile
-import urllib.request
 from pathlib import Path
+from typing import Literal
+import pooch
 from brainglobe_utils.general.config import get_config_obj
-from brainglobe_utils.general.system import disk_free_gb
+from cellfinder import DEFAULT_CELLFINDER_DIRECTORY
 from cellfinder.core.tools.source_files import (
     default_configuration_path,
     user_specific_configuration_path,
 )
+DEFAULT_DOWNLOAD_DIRECTORY = DEFAULT_CELLFINDER_DIRECTORY / "models"
-class DownloadError(Exception):
-    pass
+MODEL_URL = "https://gin.g-node.org/cellfinder/models/raw/master"
-def download_file(destination_path, file_url, filename):
-    direct_download = True
-    file_url = file_url.format(int(direct_download))
-    print(f"Downloading file: {filename}")
-    with urllib.request.urlopen(file_url) as response:
-        with open(destination_path, "wb") as outfile:
-            shutil.copyfileobj(response, outfile)
+model_filenames = {
+    "resnet50_tv": "resnet50_tv.h5",
+    "resnet50_all": "resnet50_weights.h5",
+}
+model_hashes = {
+    "resnet50_tv": "63d36af456640590ba6c896dc519f9f29861015084f4c40777a54c18c1fc4edd",  # noqa: E501
+    "resnet50_all": None,
+}
-def extract_file(tar_file_path, destination_path):
-    tar = tarfile.open(tar_file_path)
-    tar.extractall(path=destination_path)
-    tar.close()
+model_type = Literal["resnet50_tv", "resnet50_all"]
-# TODO: check that intermediate folders exist
-def download(
-    download_path,
-    url,
-    file_name,
-    install_path=None,
-    download_requires=None,
-    extract_requires=None,
-):
-    if not os.path.exists(os.path.dirname(download_path)):
-        raise DownloadError(
-            f"Could not find directory '{os.path.dirname(download_path)}' "
-            f"to download file: {file_name}"
-        )
-    if (download_requires is not None) and (
-        disk_free_gb(os.path.dirname(download_path)) < download_requires
-    ):
-        raise DownloadError(
-            f"Insufficient disk space in {os.path.dirname(download_path)} to"
-            f"download file: {file_name}"
-        )
+def download_models(
+    model_name: model_type, download_path: os.PathLike
+) -> Path:
+    """
+    For a given model name and download path, download the model file
+    and return the path to the downloaded file.
+    Parameters
+    ----------
+    model_name : model_type
+        The name of the model to be downloaded.
+    download_path : os.PathLike
+        The path where the model file will be downloaded.
-    if install_path is not None:
-        if not os.path.exists(install_path):
-            raise DownloadError(
-                f"Could not find directory '{install_path}' "
-                f"to extract file: {file_name}"
-            )
+    Returns
+    -------
+    Path
+        The path to the downloaded model file.
+    """
-        if (extract_requires is not None) and (
-            disk_free_gb(install_path) < extract_requires
-        ):
-            raise DownloadError(
-                f"Insufficient disk space in {install_path} to"
-                f"extract file: {file_name}"
-            )
+    download_path = Path(download_path)
+    filename = model_filenames[model_name]
+    model_path = pooch.retrieve(
+        url=f"{MODEL_URL}/{filename}",
+        known_hash=model_hashes[model_name],
+        path=download_path,
+        fname=filename,
+        progressbar=True,
+    )
-    download_file(download_path, url, file_name)
-    if install_path is not None:
-        extract_file(download_path, install_path)
-        os.remove(download_path)
+    return Path(model_path)
 def amend_user_configuration(new_model_path=None) -> None:
@@ -83,7 +71,7 @@ def amend_user_configuration(new_model_path=None) -> None:
     Parameters
     ----------
-    new_model_path : str, optional
+    new_model_path : Path, optional
         The path to the new model configuration.
     """
     print("(Over-)writing custom user configuration")

cellfinder 1.1.3__py3-none-any.whl → 1.3.0__py3-none-any.whl

Potentially problematic release.

cellfinder 1.1.3py3-none-any.whl → 1.3.0py3-none-any.whl