PyPI - dcnum - Versions diffs - 0.22.1__tar.gz → 0.23.1__tar.gz - Mend

dcnum 0.22.1tar.gz → 0.23.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dcnum might be problematic. Click here for more details.

Files changed (118) hide show

{dcnum-0.22.1 → dcnum-0.23.1}/.github/workflows/check.yml RENAMED Viewed

@@ -31,7 +31,7 @@ jobs:
       run: |
         # https://github.com/luispedro/mahotas/issues/144
         pip install mahotas==1.4.13
-        pip install -e .
+        pip install .[torch]
     - name: List installed packages
       run: |
         pip freeze

{dcnum-0.22.1 → dcnum-0.23.1}/CHANGELOG RENAMED Viewed

@@ -1,3 +1,10 @@
+0.23.1
+ - enh: support passing custom default arguments to get_class_method_info
+ - tests: fix torch preprocessing tests
+0.23.0
+ - feat: implement segmentation using PyTorch models
+ - fix: always compute image_bg if it is not in the input file
+ - enh: introduce `Segmenter.validate_applicability` method
 0.22.1
  - fix: compute pipeline identifier of origin dataset for basin mapping
 0.22.0

{dcnum-0.22.1 → dcnum-0.23.1}/PKG-INFO RENAMED Viewed

@@ -1,8 +1,8 @@
 Metadata-Version: 2.1
 Name: dcnum
-Version: 0.22.1
+Version: 0.23.1
 Summary: numerics toolbox for imaging deformability cytometry
-Author: Maximilian Schlögel, Paul Müller
+Author: Maximilian Schlögel, Paul Müller, Raghava Alajangi
 Maintainer-email: Paul Müller <dev@craban.de>
 License: MIT
 Project-URL: source, https://github.com/DC-Analysis/dcnum
@@ -25,6 +25,8 @@ Requires-Dist: numpy>=1.21
 Requires-Dist: opencv-python-headless
 Requires-Dist: scikit-image
 Requires-Dist: scipy>=1.8.0
+Provides-Extra: torch
+Requires-Dist: torch>=2.3; extra == "torch"
 |dcnum|
 =======

{dcnum-0.22.1 → dcnum-0.23.1}/pyproject.toml RENAMED Viewed

@@ -8,6 +8,7 @@ authors = [
     # In alphabetical order.
     {name = "Maximilian Schlögel"},
     {name = "Paul Müller"},
+    {name = "Raghava Alajangi"},
 ]
 maintainers = [
     {name = "Paul Müller", email="dev@craban.de"},
@@ -35,6 +36,9 @@ dependencies = [
 ]
 dynamic = ["version"]
+[project.optional-dependencies]
+torch = ["torch>=2.3"]
 [project.urls]
 source = "https://github.com/DC-Analysis/dcnum"
 tracker = "https://github.com/DC-Analysis/dcnum/issues"

{dcnum-0.22.1 → dcnum-0.23.1}/src/dcnum/_version.py RENAMED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.22.1'
-__version_tuple__ = version_tuple = (0, 22, 1)
+__version__ = version = '0.23.1'
+__version_tuple__ = version_tuple = (0, 23, 1)

{dcnum-0.22.1 → dcnum-0.23.1}/src/dcnum/feat/feat_background/base.py RENAMED Viewed

@@ -130,7 +130,7 @@ class Background(abc.ABC):
         """Return a unique background pipeline identifier
         The pipeline identifier is universally applicable and must
-        be backwards-compatible (future versions of dcevent will
+        be backwards-compatible (future versions of dcnum will
         correctly acknowledge the ID).
         The segmenter pipeline ID is defined as::

{dcnum-0.22.1 → dcnum-0.23.1}/src/dcnum/feat/feat_texture/tex_all.py RENAMED Viewed

@@ -6,6 +6,34 @@ from .common import haralick_names
 def haralick_texture_features(
         mask, image=None, image_bg=None, image_corr=None):
+    """Compute Haralick texture features
+    The following texture features are excluded
+    - feature 6 "Sum Average", which is equivalent to `2 * bright_bc_avg`
+      since dclab 0.44.0
+    - feature 10 "Difference Variance", because it has a functional
+      dependency on the offset value and since we do background correction,
+      we are not interested in it
+    - feature 14, because nobody is using it, it is not understood by
+      everyone what it actually is, and it is computationally expensive.
+    This leaves us with the following 11 texture features (22 if you count
+    avg and ptp):
+    https://earlglynn.github.io/RNotes/package/EBImage/Haralick-Textural-Features.html
+    - 1. `tex_asm`: (1) Angular Second Moment
+    - 2. `tex_con`: (2) Contrast
+    - 3. `tex_cor`: (3) Correlation
+    - 4. `tex_var`: (4) Variance
+    - 5. `tex_idm`: (5) Inverse Difference Moment
+    - 6. `tex_sva`: (7) Sum Variance
+    - 7. `tex_sen`: (8) Sum Entropy
+    - 8. `tex_ent`: (9) Entropy
+    - 9. `tex_den`: (11) Difference Entropy
+    - 10. `tex_f12`: (12) Information Measure of Correlation 1
+    - 11. `tex_f13`: (13) Information Measure of Correlation 2
+    """
     # make sure we have a boolean array
     mask = np.array(mask, dtype=bool)
     size = mask.shape[0]
@@ -22,7 +50,6 @@ def haralick_texture_features(
     for ii in range(size):
         # Haralick texture features
-        # https://gitlab.gwdg.de/blood_data_analysis/dcevent/-/issues/20
         # Preprocessing:
         # - create a copy of the array (don't edit `image_corr`)
         # - add grayscale values (negative values not supported)

{dcnum-0.22.1 → dcnum-0.23.1}/src/dcnum/feat/gate.py RENAMED Viewed

@@ -20,7 +20,7 @@ class Gate:
         Parameters
         ----------
         data: .HDF5Data
-            dcevent data instance
+            dcnum data instance
         online_gates: bool
             set to True to enable gating with "online" gates stored
             in the input file; online gates are applied in real-time
@@ -95,7 +95,7 @@ class Gate:
         """Return a unique gating pipeline identifier
         The pipeline identifier is universally applicable and must
-        be backwards-compatible (future versions of dcevent will
+        be backwards-compatible (future versions of dcnum will
         correctly acknowledge the ID).
         The gating pipeline ID is defined as::

{dcnum-0.22.1 → dcnum-0.23.1}/src/dcnum/feat/queue_event_extractor.py RENAMED Viewed

@@ -266,7 +266,7 @@ class QueueEventExtractor:
         """Return a unique feature extractor pipeline identifier
         The pipeline identifier is universally applicable and must
-        be backwards-compatible (future versions of dcevent will
+        be backwards-compatible (future versions of dcnum will
         correctly acknowledge the ID).
         The feature extractor pipeline ID is defined as::

{dcnum-0.22.1 → dcnum-0.23.1}/src/dcnum/logic/ctrl.py RENAMED Viewed

@@ -339,7 +339,8 @@ class DCNumJobRunner(threading.Thread):
         # hash sanity check above, check the generation, input data,
         # and background pipeline identifiers.
         redo_bg = (
-            (datdict["gen_id"] != self.ppdict["gen_id"])
+            "image_bg" not in self.draw
+            or (datdict["gen_id"] != self.ppdict["gen_id"])
             or (datdict["dat_id"] != self.ppdict["dat_id"])
             or (datdict["bg_id"] != self.ppdict["bg_id"]))

{dcnum-0.22.1 → dcnum-0.23.1}/src/dcnum/meta/ppid.py RENAMED Viewed

@@ -59,7 +59,9 @@ def convert_to_dtype(value, dtype):
 def get_class_method_info(class_obj: ClassWithPPIDCapabilities,
-                          static_kw_methods: List = None):
+                          static_kw_methods: List = None,
+                          static_kw_defaults: Dict = None,
+                          ):
     """Return dictionary of class info with static keyword methods docs
     Parameters
@@ -69,7 +71,16 @@ def get_class_method_info(class_obj: ClassWithPPIDCapabilities,
     static_kw_methods: list of callable
         The methods to inspect; all kwargs-only keyword arguments
         are extracted.
+    static_kw_defaults: dict
+        If a key in this dictionary matches an item in `static_kw_methods`,
+        then these are the default values returned in the "defaults"
+        dictionary. This is used in cases where a base class does
+        implement some annotations, but the subclass does not actually
+        use them, because e.g. they are taken from a property such as is
+        the case for the mask postprocessing of segmenter classes.
     """
+    if static_kw_defaults is None:
+        static_kw_defaults = {}
     doc = class_obj.__doc__ or class_obj.__init__.__doc__
     info = {
         "code": class_obj.get_ppid_code(),
@@ -82,7 +93,10 @@ def get_class_method_info(class_obj: ClassWithPPIDCapabilities,
         for mm in static_kw_methods:
             meth = getattr(class_obj, mm)
             spec = inspect.getfullargspec(meth)
-            defau[mm] = spec.kwonlydefaults or {}
+            if mm_defaults := static_kw_defaults.get(mm):
+                defau[mm] = mm_defaults
+            else:
+                defau[mm] = spec.kwonlydefaults or {}
             annot[mm] = spec.annotations
         info["defaults"] = defau
         info["annotations"] = annot

{dcnum-0.22.1 → dcnum-0.23.1}/src/dcnum/segm/__init__.py RENAMED Viewed

@@ -1,6 +1,9 @@
 # flake8: noqa: F401
-from .segmenter import Segmenter, get_available_segmenters
+from .segmenter import (
+    Segmenter, SegmenterNotApplicableError, get_available_segmenters
+)
 from .segmenter_mpo import MPOSegmenter
 from .segmenter_sto import STOSegmenter
 from .segmenter_manager_thread import SegmenterManagerThread
 from . import segm_thresh
+from . import segm_torch

dcnum-0.23.1/src/dcnum/segm/segm_torch/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+import importlib
+try:
+    torch = importlib.import_module("torch")
+    req_maj = 2
+    req_min = 3
+    ver_tuple = torch.__version__.split(".")
+    act_maj = int(ver_tuple[0])
+    act_min = int(ver_tuple[1])
+    if act_maj < req_maj or (act_maj == req_maj and act_min < req_min):
+        raise ValueError(f"Your PyTorch version {act_maj}.{act_min} is not "
+                         f"supported, please update to at least "
+                         f"{req_maj}.{req_min}")
+except ImportError:
+    pass
+else:
+    from .segm_torch_mpo import SegmentTorchMPO  # noqa: F401
+    if torch.cuda.is_available():
+        from .segm_torch_sto import SegmentTorchSTO  # noqa: F401

dcnum-0.23.1/src/dcnum/segm/segm_torch/segm_torch_base.py ADDED Viewed

@@ -0,0 +1,125 @@
+import functools
+import pathlib
+import re
+from typing import Dict
+from ...meta import paths
+from ..segmenter import Segmenter, SegmenterNotApplicableError
+from .torch_model import load_model
+class TorchSegmenterBase(Segmenter):
+    """Torch segmenters that use a pretrained model for segmentation"""
+    requires_background_correction = False
+    mask_postprocessing = True
+    mask_default_kwargs = {
+        "clear_border": True,
+        "fill_holes": True,
+        "closing_disk": 0,
+    }
+    @classmethod
+    def get_ppid_from_ppkw(cls, kwargs, kwargs_mask=None):
+        kwargs_new = kwargs.copy()
+        # Make sure that the `model_file` kwarg is actually just a filename
+        # so that the pipeline identifier only contains the name, but not
+        # the full path.
+        if "model_file" in kwargs:
+            model_file = kwargs["model_file"]
+            mpath = pathlib.Path(model_file)
+            if mpath.exists():
+                # register the location of the file in the search path
+                # registry so other threads/processes will find it.
+                paths.register_search_path("torch_model_files", mpath.parent)
+                kwargs_new["model_file"] = mpath.name
+        return super(TorchSegmenterBase, cls).get_ppid_from_ppkw(kwargs_new,
+                                                                 kwargs_mask)
+    @classmethod
+    def validate_applicability(cls,
+                               segmenter_kwargs: Dict,
+                               meta: Dict = None,
+                               logs: Dict = None):
+        """Validate the applicability of this segmenter for a dataset
+        The applicability is defined by the metadata in the segmentation
+        model.
+        Parameters
+        ----------
+        segmenter_kwargs: dict
+            Keyword arguments for the segmenter
+        meta: dict
+            Dictionary of metadata from an :class:`HDF5Data` instance
+        logs: dict
+            Dictionary of logs from an :class:`HDF5Data` instance
+        Returns
+        -------
+        applicable: bool
+            True if the segmenter is applicable to the dataset
+        Raises
+        ------
+        SegmenterNotApplicable
+            If the segmenter is not applicable to the dataset
+        """
+        if "model_file" not in segmenter_kwargs:
+            raise ValueError("A `model_file` must be provided in the "
+                             "`segmenter_kwargs` to validate applicability")
+        model_file = segmenter_kwargs["model_file"]
+        _, model_meta = load_model(model_file, device="cpu")
+        reasons_list = []
+        validators = {
+            "meta": functools.partial(
+                cls._validate_applicability_item,
+                data_dict=meta,
+                reasons_list=reasons_list),
+            "logs": functools.partial(
+                cls._validate_applicability_item,
+                # convert logs to strings
+                data_dict={key: "\n".join(val) for key, val in logs.items()},
+                reasons_list=reasons_list)
+        }
+        for item in model_meta.get("validation", []):
+            it = item["type"]
+            if it in validators:
+                validators[it](item)
+            else:
+                reasons_list.append(
+                    f"invalid validation type {it} in {model_file}")
+        if reasons_list:
+            raise SegmenterNotApplicableError(segmenter_class=cls,
+                                              reasons_list=reasons_list)
+        return True
+    @staticmethod
+    def _validate_applicability_item(item, data_dict, reasons_list):
+        """Populate `reasons_list` with invalid entries
+        Example `data_dict`::
+            {"type": "meta",
+             "key": "setup:region",
+             "allow-missing-key": False,
+             "regexp": "^channel$",
+             "regexp-negate": False,
+             "reason": "only channel region supported",
+             }
+        """
+        key = item["key"]
+        if key in data_dict:
+            regexp = re.compile(item["regexp"])
+            matched = bool(regexp.match(data_dict[key]))
+            negate = item.get("regexp-negate", False)
+            valid = matched if not negate else not matched
+            if not valid:
+                reasons_list.append(item.get("reason", "unknown reason"))
+        elif not item.get("allow-missing-key", False):
+            reasons_list.append(f"Key '{key}' missing in {item['type']}")

dcnum-0.23.1/src/dcnum/segm/segm_torch/segm_torch_mpo.py ADDED Viewed

@@ -0,0 +1,71 @@
+import numpy as np
+import torch
+from ..segmenter_mpo import MPOSegmenter
+from .segm_torch_base import TorchSegmenterBase
+from .torch_model import load_model
+from .torch_preproc import preprocess_images
+from .torch_postproc import postprocess_masks
+class SegmentTorchMPO(TorchSegmenterBase, MPOSegmenter):
+    """PyTorch segmentation (multiprocessing version)"""
+    @staticmethod
+    def segment_algorithm(image, *,
+                          model_file: str = None):
+        """
+        Parameters
+        ----------
+        image: 2d ndarray
+            event image
+        model_file: str
+            path to or name of a dcnum model file (.dcnm); if only a
+            name is provided, then the "torch_model_files" directory
+            paths are searched for the file name
+        Returns
+        -------
+        mask: 2d boolean or integer ndarray
+            mask or labeling image for the give index
+        """
+        if model_file is None:
+            raise ValueError("Please specify a .dcnm model file!")
+        # Set number of pytorch threads to 1, because dcnum is doing
+        # all the multiprocessing.
+        # https://pytorch.org/docs/stable/generated/torch.set_num_threads.html#torch.set_num_threads
+        torch.set_num_threads(1)
+        device = torch.device("cpu")
+        # Load model and metadata
+        model, model_meta = load_model(model_file, device)
+        image_preproc = preprocess_images(image[np.newaxis, :, :],
+                                          **model_meta["preprocessing"])
+        image_ten = torch.from_numpy(image_preproc)
+        # Move image tensors to device
+        image_ten_on_device = image_ten.to(device)
+        # Model inference
+        pred_tensor = model(image_ten_on_device)
+        # Convert cuda-tensor into numpy mask array. The `pred_tensor`
+        # array is still of the shape (1, 1, H, W). The `masks`
+        # array is of shape (1, H, W). We can optionally label it
+        # here (we have to if the shapes don't match) or do it in
+        # postprocessing.
+        masks = pred_tensor.detach().cpu().numpy()[0] >= 0.5
+        # Perform postprocessing in cases where the image shapes don't match
+        assert len(masks[0].shape) == len(image.shape), "sanity check"
+        if masks[0].shape != image.shape:
+            labels = postprocess_masks(
+                masks=masks,
+                original_image_shape=image.shape,
+            )
+            return labels[0]
+        else:
+            return masks[0]

dcnum-0.23.1/src/dcnum/segm/segm_torch/segm_torch_sto.py ADDED Viewed

@@ -0,0 +1,88 @@
+from dcnum.segm import STOSegmenter
+import numpy as np
+import torch
+from .segm_torch_base import TorchSegmenterBase
+from .torch_model import load_model
+from .torch_preproc import preprocess_images
+from .torch_postproc import postprocess_masks
+class SegmentTorchSTO(TorchSegmenterBase, STOSegmenter):
+    """PyTorch segmentation (GPU version)"""
+    @staticmethod
+    def _segment_in_batches(imgs_t, model, batch_size, device):
+        """Segment image data in batches"""
+        size = len(imgs_t)
+        # Create empty array to fill up with segmented batches
+        masks = np.empty((len(imgs_t), *imgs_t[0].shape[-2:]),
+                         dtype=bool)
+        for start_idx in range(0, size, batch_size):
+            batch = imgs_t[start_idx:start_idx + batch_size]
+            # Move image tensors to cuda
+            batch = torch.tensor(batch, device=device)
+            # Model inference
+            batch_seg = model(batch)
+            # Remove extra dim [B, C, H, W] --> [B, H, W]
+            batch_seg = batch_seg.squeeze(1)
+            # Convert cuda-tensor into numpy arrays
+            batch_seg_np = batch_seg.detach().cpu().numpy()
+            # Fill empty array with segmented batch
+            masks[start_idx:start_idx + batch_size] = batch_seg_np >= 0.5
+        return masks
+    @staticmethod
+    def segment_algorithm(images, gpu_id=None, batch_size=50, *,
+                          model_file: str = None):
+        """
+        Parameters
+        ----------
+        images: 3d ndarray
+            array of N event images of shape (N, H, W)
+        gpu_id: str
+            optional argument specifying the GPU to use
+        batch_size: int
+            number of images to process in one batch
+        model_file: str
+            path to or name of a dcnum model file (.dcnm); if only a
+            name is provided, then the "torch_model_files" directory
+            paths are searched for the file name
+        Returns
+        -------
+        mask: 2d boolean or integer ndarray
+            mask or label images of shape (N, H, W)
+        """
+        if model_file is None:
+            raise ValueError("Please specify a model file!")
+        # Determine device to use
+        device = torch.device(gpu_id if gpu_id is not None else "cuda")
+        # Load model and metadata
+        model, model_meta = load_model(model_file, device)
+        # Preprocess the images
+        image_preproc = preprocess_images(images,
+                                          **model_meta["preprocessing"])
+        # Model inference
+        # The `masks` array has the shape (len(images), H, W), where
+        # H and W may be different from the corresponding axes in `images`.
+        masks = SegmentTorchSTO._segment_in_batches(image_preproc,
+                                                    model,
+                                                    batch_size,
+                                                    device
+                                                    )
+        # Perform postprocessing in cases where the image shapes don't match
+        assert len(masks.shape[1:]) == len(images.shape[1:]), "sanity check"
+        if masks.shape[1:] != images.shape[1:]:
+            labels = postprocess_masks(
+                masks=masks,
+                original_image_shape=images.shape[1:])
+            return labels
+        else:
+            return masks

dcnum-0.23.1/src/dcnum/segm/segm_torch/torch_model.py ADDED Viewed

@@ -0,0 +1,95 @@
+import errno
+import functools
+import hashlib
+import json
+import logging
+import os
+import pathlib
+import torch
+from ...meta import paths
+logger = logging.getLogger(__name__)
+def check_md5sum(path):
+    """Verify the last five characters of the file stem with its MD5 hash"""
+    md5 = hashlib.md5(path.read_bytes()).hexdigest()
+    if md5[:5] != path.stem.split("_")[-1]:
+        raise ValueError(f"MD5 mismatch for {path} ({md5})! Expected the "
+                         f"input file to end with '{md5[:5]}{path.suffix}'.")
+@functools.cache
+def load_model(path_or_name, device):
+    """Load a PyTorch model + metadata from a TorchScript jit checkpoint
+    Parameters
+    ----------
+    path_or_name: str or pathlib.Path
+        jit checkpoint file; For dcnum, these files have the suffix .dcnm
+        and contain a special `_extra_files["dcnum_meta.json"]` extra
+        file that can be loaded via `torch.jit.load` (see below).
+    device: str or torch.device
+        device on which to run the model
+    Returns
+    -------
+    model_jit: torch.jit.ScriptModule
+        loaded PyTorch model stored as a TorchScript module
+    model_meta: dict
+        metadata associated with the loaded model
+    """
+    model_path = retrieve_model_file(path_or_name)
+    # define an extra files mapping dictionary that loads the model's metadata
+    extra_files = {"dcnum_meta.json": ""}
+    # load model
+    model_jit = torch.jit.load(model_path,
+                               _extra_files=extra_files,
+                               map_location=device)
+    # load model metadata
+    model_meta = json.loads(extra_files["dcnum_meta.json"])
+    # set model to evaluation mode
+    model_jit.eval()
+    # optimize for inference on device
+    model_jit = torch.jit.optimize_for_inference(model_jit)
+    return model_jit, model_meta
+@functools.cache
+def retrieve_model_file(path_or_name):
+    """Retrieve a dcnum torch model file
+    If a path to a model is given, then this path is returned directly.
+    If a file name is given, then look for the file with
+    :func:`dcnum.meta.paths.find_file` using the "torch_model_file"
+    topic.
+    """
+    # Did the user already pass a path?
+    if isinstance(path_or_name, pathlib.Path):
+        if path_or_name.exists():
+            path = path_or_name
+        else:
+            try:
+                return retrieve_model_file(path_or_name.name)
+            except BaseException:
+                raise FileNotFoundError(errno.ENOENT,
+                                        os.strerror(errno.ENOENT),
+                                        str(path_or_name))
+    elif isinstance(path_or_name, str):
+        name = path_or_name.strip()
+        # We now have a string for a filename, and we have to figure out what
+        # the path is. There are several options, including cached files.
+        if pathlib.Path(name).exists():
+            path = pathlib.Path(name)
+        else:
+            path = paths.find_file("torch_model_files", name)
+    else:
+        raise ValueError(
+            f"Please pass a string or a path, got {type(path_or_name)}!")
+    logger.info(f"Found dcnum model file {path}")
+    check_md5sum(path)
+    return path

dcnum 0.22.1__tar.gz → 0.23.1__tar.gz

Potentially problematic release.

dcnum 0.22.1tar.gz → 0.23.1tar.gz