PyPI - dcnum - Versions diffs - 0.22.0__py3-none-any.whl → 0.23.0__py3-none-any.whl - Mend

dcnum 0.22.0py3-none-any.whl → 0.23.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dcnum might be problematic. Click here for more details.

Files changed (20) hide show

dcnum/_version.py +2 -2
dcnum/feat/feat_background/base.py +1 -1
dcnum/feat/feat_texture/tex_all.py +28 -1
dcnum/feat/gate.py +2 -2
dcnum/feat/queue_event_extractor.py +1 -1
dcnum/logic/ctrl.py +14 -4
dcnum/segm/__init__.py +4 -1
dcnum/segm/segm_torch/__init__.py +19 -0
dcnum/segm/segm_torch/segm_torch_base.py +125 -0
dcnum/segm/segm_torch/segm_torch_mpo.py +71 -0
dcnum/segm/segm_torch/segm_torch_sto.py +88 -0
dcnum/segm/segm_torch/torch_model.py +95 -0
dcnum/segm/segm_torch/torch_postproc.py +93 -0
dcnum/segm/segm_torch/torch_preproc.py +109 -0
dcnum/segm/segmenter.py +41 -1
{dcnum-0.22.0.dist-info → dcnum-0.23.0.dist-info}/METADATA +4 -2
{dcnum-0.22.0.dist-info → dcnum-0.23.0.dist-info}/RECORD +20 -13
{dcnum-0.22.0.dist-info → dcnum-0.23.0.dist-info}/LICENSE +0 -0
{dcnum-0.22.0.dist-info → dcnum-0.23.0.dist-info}/WHEEL +0 -0
{dcnum-0.22.0.dist-info → dcnum-0.23.0.dist-info}/top_level.txt +0 -0

dcnum/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.22.0'
-__version_tuple__ = version_tuple = (0, 22, 0)
+__version__ = version = '0.23.0'
+__version_tuple__ = version_tuple = (0, 23, 0)

dcnum/feat/feat_background/base.py CHANGED Viewed

@@ -130,7 +130,7 @@ class Background(abc.ABC):
         """Return a unique background pipeline identifier
         The pipeline identifier is universally applicable and must
-        be backwards-compatible (future versions of dcevent will
+        be backwards-compatible (future versions of dcnum will
         correctly acknowledge the ID).
         The segmenter pipeline ID is defined as::

dcnum/feat/feat_texture/tex_all.py CHANGED Viewed

@@ -6,6 +6,34 @@ from .common import haralick_names
 def haralick_texture_features(
         mask, image=None, image_bg=None, image_corr=None):
+    """Compute Haralick texture features
+    The following texture features are excluded
+    - feature 6 "Sum Average", which is equivalent to `2 * bright_bc_avg`
+      since dclab 0.44.0
+    - feature 10 "Difference Variance", because it has a functional
+      dependency on the offset value and since we do background correction,
+      we are not interested in it
+    - feature 14, because nobody is using it, it is not understood by
+      everyone what it actually is, and it is computationally expensive.
+    This leaves us with the following 11 texture features (22 if you count
+    avg and ptp):
+    https://earlglynn.github.io/RNotes/package/EBImage/Haralick-Textural-Features.html
+    - 1. `tex_asm`: (1) Angular Second Moment
+    - 2. `tex_con`: (2) Contrast
+    - 3. `tex_cor`: (3) Correlation
+    - 4. `tex_var`: (4) Variance
+    - 5. `tex_idm`: (5) Inverse Difference Moment
+    - 6. `tex_sva`: (7) Sum Variance
+    - 7. `tex_sen`: (8) Sum Entropy
+    - 8. `tex_ent`: (9) Entropy
+    - 9. `tex_den`: (11) Difference Entropy
+    - 10. `tex_f12`: (12) Information Measure of Correlation 1
+    - 11. `tex_f13`: (13) Information Measure of Correlation 2
+    """
     # make sure we have a boolean array
     mask = np.array(mask, dtype=bool)
     size = mask.shape[0]
@@ -22,7 +50,6 @@ def haralick_texture_features(
     for ii in range(size):
         # Haralick texture features
-        # https://gitlab.gwdg.de/blood_data_analysis/dcevent/-/issues/20
         # Preprocessing:
         # - create a copy of the array (don't edit `image_corr`)
         # - add grayscale values (negative values not supported)

dcnum/feat/gate.py CHANGED Viewed

@@ -20,7 +20,7 @@ class Gate:
         Parameters
         ----------
         data: .HDF5Data
-            dcevent data instance
+            dcnum data instance
         online_gates: bool
             set to True to enable gating with "online" gates stored
             in the input file; online gates are applied in real-time
@@ -95,7 +95,7 @@ class Gate:
         """Return a unique gating pipeline identifier
         The pipeline identifier is universally applicable and must
-        be backwards-compatible (future versions of dcevent will
+        be backwards-compatible (future versions of dcnum will
         correctly acknowledge the ID).
         The gating pipeline ID is defined as::

dcnum/feat/queue_event_extractor.py CHANGED Viewed

@@ -266,7 +266,7 @@ class QueueEventExtractor:
         """Return a unique feature extractor pipeline identifier
         The pipeline identifier is universally applicable and must
-        be backwards-compatible (future versions of dcevent will
+        be backwards-compatible (future versions of dcnum will
         correctly acknowledge the ID).
         The feature extractor pipeline ID is defined as::

dcnum/logic/ctrl.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import collections
 import datetime
+import hashlib
 import json
 import logging
 from logging.handlers import QueueListener
@@ -338,7 +339,8 @@ class DCNumJobRunner(threading.Thread):
         # hash sanity check above, check the generation, input data,
         # and background pipeline identifiers.
         redo_bg = (
-            (datdict["gen_id"] != self.ppdict["gen_id"])
+            "image_bg" not in self.draw
+            or (datdict["gen_id"] != self.ppdict["gen_id"])
             or (datdict["dat_id"] != self.ppdict["dat_id"])
             or (datdict["bg_id"] != self.ppdict["bg_id"]))
@@ -461,9 +463,17 @@ class DCNumJobRunner(threading.Thread):
                 # This is the identifier appendix that we use to identify this
                 # dataset. Note that we only override the run identifier when
                 # segmentation did actually take place.
-                mid_ap = "dcn-" + self.pphash[:7]
-                # This is the current measurement identifier (may be empty).
-                mid_cur = hw.h5.attrs.get("experiment:run identifier", "")
+                mid_ap = f"dcn-{self.pphash[:7]}"
+                # This is the current measurement identifier
+                mid_cur = hw.h5.attrs.get("experiment:run identifier")
+                if not mid_cur:
+                    # Compute a measurement identifier from the metadata
+                    m_time = hw.h5.attrs.get("experiment:time", "none")
+                    m_date = hw.h5.attrs.get("experiment:date", "none")
+                    m_sid = hw.h5.attrs.get("setup:identifier", "none")
+                    hasher = hashlib.md5(
+                        f"{m_time}_{m_date}_{m_sid}".encode("utf-8"))
+                    mid_cur = str(uuid.UUID(hex=hasher.hexdigest()))
                 # The new measurement identifier is a combination of both.
                 mid_new = f"{mid_cur}_{mid_ap}" if mid_cur else mid_ap
                 hw.h5.attrs["experiment:run identifier"] = mid_new

dcnum/segm/__init__.py CHANGED Viewed

@@ -1,6 +1,9 @@
 # flake8: noqa: F401
-from .segmenter import Segmenter, get_available_segmenters
+from .segmenter import (
+    Segmenter, SegmenterNotApplicableError, get_available_segmenters
+)
 from .segmenter_mpo import MPOSegmenter
 from .segmenter_sto import STOSegmenter
 from .segmenter_manager_thread import SegmenterManagerThread
 from . import segm_thresh
+from . import segm_torch

dcnum/segm/segm_torch/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+import importlib
+try:
+    torch = importlib.import_module("torch")
+    req_maj = 2
+    req_min = 3
+    ver_tuple = torch.__version__.split(".")
+    act_maj = int(ver_tuple[0])
+    act_min = int(ver_tuple[1])
+    if act_maj < req_maj or (act_maj == req_maj and act_min < req_min):
+        raise ValueError(f"Your PyTorch version {act_maj}.{act_min} is not "
+                         f"supported, please update to at least "
+                         f"{req_maj}.{req_min}")
+except ImportError:
+    pass
+else:
+    from .segm_torch_mpo import SegmentTorchMPO  # noqa: F401
+    if torch.cuda.is_available():
+        from .segm_torch_sto import SegmentTorchSTO  # noqa: F401

dcnum/segm/segm_torch/segm_torch_base.py ADDED Viewed

@@ -0,0 +1,125 @@
+import functools
+import pathlib
+import re
+from typing import Dict
+from ...meta import paths
+from ..segmenter import Segmenter, SegmenterNotApplicableError
+from .torch_model import load_model
+class TorchSegmenterBase(Segmenter):
+    """Torch segmenters that use a pretrained model for segmentation"""
+    requires_background_correction = False
+    mask_postprocessing = True
+    mask_default_kwargs = {
+        "clear_border": True,
+        "fill_holes": True,
+        "closing_disk": 0,
+    }
+    @classmethod
+    def get_ppid_from_ppkw(cls, kwargs, kwargs_mask=None):
+        kwargs_new = kwargs.copy()
+        # Make sure that the `model_file` kwarg is actually just a filename
+        # so that the pipeline identifier only contains the name, but not
+        # the full path.
+        if "model_file" in kwargs:
+            model_file = kwargs["model_file"]
+            mpath = pathlib.Path(model_file)
+            if mpath.exists():
+                # register the location of the file in the search path
+                # registry so other threads/processes will find it.
+                paths.register_search_path("torch_model_files", mpath.parent)
+                kwargs_new["model_file"] = mpath.name
+        return super(TorchSegmenterBase, cls).get_ppid_from_ppkw(kwargs_new,
+                                                                 kwargs_mask)
+    @classmethod
+    def validate_applicability(cls,
+                               segmenter_kwargs: Dict,
+                               meta: Dict = None,
+                               logs: Dict = None):
+        """Validate the applicability of this segmenter for a dataset
+        The applicability is defined by the metadata in the segmentation
+        model.
+        Parameters
+        ----------
+        segmenter_kwargs: dict
+            Keyword arguments for the segmenter
+        meta: dict
+            Dictionary of metadata from an :class:`HDF5Data` instance
+        logs: dict
+            Dictionary of logs from an :class:`HDF5Data` instance
+        Returns
+        -------
+        applicable: bool
+            True if the segmenter is applicable to the dataset
+        Raises
+        ------
+        SegmenterNotApplicable
+            If the segmenter is not applicable to the dataset
+        """
+        if "model_file" not in segmenter_kwargs:
+            raise ValueError("A `model_file` must be provided in the "
+                             "`segmenter_kwargs` to validate applicability")
+        model_file = segmenter_kwargs["model_file"]
+        _, model_meta = load_model(model_file, device="cpu")
+        reasons_list = []
+        validators = {
+            "meta": functools.partial(
+                cls._validate_applicability_item,
+                data_dict=meta,
+                reasons_list=reasons_list),
+            "logs": functools.partial(
+                cls._validate_applicability_item,
+                # convert logs to strings
+                data_dict={key: "\n".join(val) for key, val in logs.items()},
+                reasons_list=reasons_list)
+        }
+        for item in model_meta.get("validation", []):
+            it = item["type"]
+            if it in validators:
+                validators[it](item)
+            else:
+                reasons_list.append(
+                    f"invalid validation type {it} in {model_file}")
+        if reasons_list:
+            raise SegmenterNotApplicableError(segmenter_class=cls,
+                                              reasons_list=reasons_list)
+        return True
+    @staticmethod
+    def _validate_applicability_item(item, data_dict, reasons_list):
+        """Populate `reasons_list` with invalid entries
+        Example `data_dict`::
+            {"type": "meta",
+             "key": "setup:region",
+             "allow-missing-key": False,
+             "regexp": "^channel$",
+             "regexp-negate": False,
+             "reason": "only channel region supported",
+             }
+        """
+        key = item["key"]
+        if key in data_dict:
+            regexp = re.compile(item["regexp"])
+            matched = bool(regexp.match(data_dict[key]))
+            negate = item.get("regexp-negate", False)
+            valid = matched if not negate else not matched
+            if not valid:
+                reasons_list.append(item.get("reason", "unknown reason"))
+        elif not item.get("allow-missing-key", False):
+            reasons_list.append(f"Key '{key}' missing in {item['type']}")

dcnum/segm/segm_torch/segm_torch_mpo.py ADDED Viewed

@@ -0,0 +1,71 @@
+import numpy as np
+import torch
+from ..segmenter_mpo import MPOSegmenter
+from .segm_torch_base import TorchSegmenterBase
+from .torch_model import load_model
+from .torch_preproc import preprocess_images
+from .torch_postproc import postprocess_masks
+class SegmentTorchMPO(TorchSegmenterBase, MPOSegmenter):
+    """PyTorch segmentation (multiprocessing version)"""
+    @staticmethod
+    def segment_algorithm(image, *,
+                          model_file: str = None):
+        """
+        Parameters
+        ----------
+        image: 2d ndarray
+            event image
+        model_file: str
+            path to or name of a dcnum model file (.dcnm); if only a
+            name is provided, then the "torch_model_files" directory
+            paths are searched for the file name
+        Returns
+        -------
+        mask: 2d boolean or integer ndarray
+            mask or labeling image for the give index
+        """
+        if model_file is None:
+            raise ValueError("Please specify a .dcnm model file!")
+        # Set number of pytorch threads to 1, because dcnum is doing
+        # all the multiprocessing.
+        # https://pytorch.org/docs/stable/generated/torch.set_num_threads.html#torch.set_num_threads
+        torch.set_num_threads(1)
+        device = torch.device("cpu")
+        # Load model and metadata
+        model, model_meta = load_model(model_file, device)
+        image_preproc = preprocess_images(image[np.newaxis, :, :],
+                                          **model_meta["preprocessing"])
+        image_ten = torch.from_numpy(image_preproc)
+        # Move image tensors to device
+        image_ten_on_device = image_ten.to(device)
+        # Model inference
+        pred_tensor = model(image_ten_on_device)
+        # Convert cuda-tensor into numpy mask array. The `pred_tensor`
+        # array is still of the shape (1, 1, H, W). The `masks`
+        # array is of shape (1, H, W). We can optionally label it
+        # here (we have to if the shapes don't match) or do it in
+        # postprocessing.
+        masks = pred_tensor.detach().cpu().numpy()[0] >= 0.5
+        # Perform postprocessing in cases where the image shapes don't match
+        assert len(masks[0].shape) == len(image.shape), "sanity check"
+        if masks[0].shape != image.shape:
+            labels = postprocess_masks(
+                masks=masks,
+                original_image_shape=image.shape,
+            )
+            return labels[0]
+        else:
+            return masks[0]

dcnum/segm/segm_torch/segm_torch_sto.py ADDED Viewed

@@ -0,0 +1,88 @@
+from dcnum.segm import STOSegmenter
+import numpy as np
+import torch
+from .segm_torch_base import TorchSegmenterBase
+from .torch_model import load_model
+from .torch_preproc import preprocess_images
+from .torch_postproc import postprocess_masks
+class SegmentTorchSTO(TorchSegmenterBase, STOSegmenter):
+    """PyTorch segmentation (GPU version)"""
+    @staticmethod
+    def _segment_in_batches(imgs_t, model, batch_size, device):
+        """Segment image data in batches"""
+        size = len(imgs_t)
+        # Create empty array to fill up with segmented batches
+        masks = np.empty((len(imgs_t), *imgs_t[0].shape[-2:]),
+                         dtype=bool)
+        for start_idx in range(0, size, batch_size):
+            batch = imgs_t[start_idx:start_idx + batch_size]
+            # Move image tensors to cuda
+            batch = torch.tensor(batch, device=device)
+            # Model inference
+            batch_seg = model(batch)
+            # Remove extra dim [B, C, H, W] --> [B, H, W]
+            batch_seg = batch_seg.squeeze(1)
+            # Convert cuda-tensor into numpy arrays
+            batch_seg_np = batch_seg.detach().cpu().numpy()
+            # Fill empty array with segmented batch
+            masks[start_idx:start_idx + batch_size] = batch_seg_np >= 0.5
+        return masks
+    @staticmethod
+    def segment_algorithm(images, gpu_id=None, batch_size=50, *,
+                          model_file: str = None):
+        """
+        Parameters
+        ----------
+        images: 3d ndarray
+            array of N event images of shape (N, H, W)
+        gpu_id: str
+            optional argument specifying the GPU to use
+        batch_size: int
+            number of images to process in one batch
+        model_file: str
+            path to or name of a dcnum model file (.dcnm); if only a
+            name is provided, then the "torch_model_files" directory
+            paths are searched for the file name
+        Returns
+        -------
+        mask: 2d boolean or integer ndarray
+            mask or label images of shape (N, H, W)
+        """
+        if model_file is None:
+            raise ValueError("Please specify a model file!")
+        # Determine device to use
+        device = torch.device(gpu_id if gpu_id is not None else "cuda")
+        # Load model and metadata
+        model, model_meta = load_model(model_file, device)
+        # Preprocess the images
+        image_preproc = preprocess_images(images,
+                                          **model_meta["preprocessing"])
+        # Model inference
+        # The `masks` array has the shape (len(images), H, W), where
+        # H and W may be different from the corresponding axes in `images`.
+        masks = SegmentTorchSTO._segment_in_batches(image_preproc,
+                                                    model,
+                                                    batch_size,
+                                                    device
+                                                    )
+        # Perform postprocessing in cases where the image shapes don't match
+        assert len(masks.shape[1:]) == len(images.shape[1:]), "sanity check"
+        if masks.shape[1:] != images.shape[1:]:
+            labels = postprocess_masks(
+                masks=masks,
+                original_image_shape=images.shape[1:])
+            return labels
+        else:
+            return masks

dcnum/segm/segm_torch/torch_model.py ADDED Viewed

@@ -0,0 +1,95 @@
+import errno
+import functools
+import hashlib
+import json
+import logging
+import os
+import pathlib
+import torch
+from ...meta import paths
+logger = logging.getLogger(__name__)
+def check_md5sum(path):
+    """Verify the last five characters of the file stem with its MD5 hash"""
+    md5 = hashlib.md5(path.read_bytes()).hexdigest()
+    if md5[:5] != path.stem.split("_")[-1]:
+        raise ValueError(f"MD5 mismatch for {path} ({md5})! Expected the "
+                         f"input file to end with '{md5[:5]}{path.suffix}'.")
+@functools.cache
+def load_model(path_or_name, device):
+    """Load a PyTorch model + metadata from a TorchScript jit checkpoint
+    Parameters
+    ----------
+    path_or_name: str or pathlib.Path
+        jit checkpoint file; For dcnum, these files have the suffix .dcnm
+        and contain a special `_extra_files["dcnum_meta.json"]` extra
+        file that can be loaded via `torch.jit.load` (see below).
+    device: str or torch.device
+        device on which to run the model
+    Returns
+    -------
+    model_jit: torch.jit.ScriptModule
+        loaded PyTorch model stored as a TorchScript module
+    model_meta: dict
+        metadata associated with the loaded model
+    """
+    model_path = retrieve_model_file(path_or_name)
+    # define an extra files mapping dictionary that loads the model's metadata
+    extra_files = {"dcnum_meta.json": ""}
+    # load model
+    model_jit = torch.jit.load(model_path,
+                               _extra_files=extra_files,
+                               map_location=device)
+    # load model metadata
+    model_meta = json.loads(extra_files["dcnum_meta.json"])
+    # set model to evaluation mode
+    model_jit.eval()
+    # optimize for inference on device
+    model_jit = torch.jit.optimize_for_inference(model_jit)
+    return model_jit, model_meta
+@functools.cache
+def retrieve_model_file(path_or_name):
+    """Retrieve a dcnum torch model file
+    If a path to a model is given, then this path is returned directly.
+    If a file name is given, then look for the file with
+    :func:`dcnum.meta.paths.find_file` using the "torch_model_file"
+    topic.
+    """
+    # Did the user already pass a path?
+    if isinstance(path_or_name, pathlib.Path):
+        if path_or_name.exists():
+            path = path_or_name
+        else:
+            try:
+                return retrieve_model_file(path_or_name.name)
+            except BaseException:
+                raise FileNotFoundError(errno.ENOENT,
+                                        os.strerror(errno.ENOENT),
+                                        str(path_or_name))
+    elif isinstance(path_or_name, str):
+        name = path_or_name.strip()
+        # We now have a string for a filename, and we have to figure out what
+        # the path is. There are several options, including cached files.
+        if pathlib.Path(name).exists():
+            path = pathlib.Path(name)
+        else:
+            path = paths.find_file("torch_model_files", name)
+    else:
+        raise ValueError(
+            f"Please pass a string or a path, got {type(path_or_name)}!")
+    logger.info(f"Found dcnum model file {path}")
+    check_md5sum(path)
+    return path

dcnum/segm/segm_torch/torch_postproc.py ADDED Viewed

@@ -0,0 +1,93 @@
+from typing import Tuple
+from ..segmenter import Segmenter
+import numpy as np
+from scipy import ndimage as ndi
+def postprocess_masks(masks,
+                      original_image_shape: Tuple[int, int]):
+    """Postprocess mask images from ML segmenters
+    The transformation includes:
+    - Revert the cropping and padding operations done in
+      :func:`.preprocess_images` by padding with zeros and cropping.
+    - If the original image shape is larger than the mask image shape,
+      also clear borders in an intermediate step
+      (maks postprocessing using :func:`Segmenter.process_mask`).
+    Parameters
+    ----------
+    masks: 3d or 4d ndarray
+        Mask data in shape (batch_size, 1, imagex_size, imagey_size)
+        or (batch_size, imagex_size, imagey_size).
+    original_image_shape: tuple of (int, int)
+        The required output mask shape for one event. This required for
+        doing the inverse of what is done in :func:`.preprocess_images`.
+    Returns
+    -------
+    labels_proc: np.ndarray
+        An integer array with the same dimensions as the original image
+        data passed to :func:`.preprocess_images`. The shape of this array
+        is (batch_size, original_image_shape[0], original_image_shape[1]).
+    """
+    # If output of model is 4d, remove channel axis
+    if len(masks.shape) == 4:
+        masks = masks[:, 0, :, :]
+    # Label the mask image
+    labels = np.empty(masks.shape, dtype=np.uint16)
+    label_struct = ndi.generate_binary_structure(2, 2)
+    for ii in range(masks.shape[0]):
+        ndi.label(
+            input=masks[ii],
+            output=labels[ii],
+            structure=label_struct)
+    batch_size = labels.shape[0]
+    # Revert padding and cropping from preprocessing
+    mask_shape_ret = labels.shape[1:]
+    # height
+    s0diff = original_image_shape[0] - mask_shape_ret[0]
+    s0t = abs(s0diff) // 2
+    s0b = abs(s0diff) - s0t
+    # width
+    s1diff = original_image_shape[1] - mask_shape_ret[1]
+    s1l = abs(s1diff) // 2
+    s1r = abs(s1diff) - s1l
+    if s0diff > 0 or s1diff > 0:
+        # The masks that we have must be padded. Before we do that, we have
+        # to remove events on the edges, otherwise we will have half-segmented
+        # cell events in the output array.
+        for ii in range(batch_size):
+            labels[ii] = Segmenter.process_mask(labels[ii],
+                                                clear_border=True,
+                                                fill_holes=False,
+                                                closing_disk=0)
+    # Crop first, only then pad.
+    if s1diff > 0:
+        labels_pad = np.zeros((batch_size,
+                              labels.shape[1],
+                              original_image_shape[1]),
+                              dtype=np.uint16)
+        labels_pad[:, :, s1l:-s1r] = labels
+        labels = labels_pad
+    elif s1diff < 0:
+        labels = labels[:, :, s1l:-s1r]
+    if s0diff > 0:
+        labels_pad = np.zeros((batch_size,
+                              original_image_shape[0],
+                              original_image_shape[1]),
+                              dtype=np.uint16)
+        labels_pad[:, s0t:-s0b, :] = labels
+        labels = labels_pad
+    elif s0diff < 0:
+        labels = labels[:, s0t:-s0b, :]
+    return labels

dcnum/segm/segm_torch/torch_preproc.py ADDED Viewed

@@ -0,0 +1,109 @@
+from typing import Tuple
+import numpy as np
+def preprocess_images(images: np.ndarray,
+                      norm_mean: float,
+                      norm_std: float,
+                      image_shape: Tuple[int, int] = None,
+                      ):
+    """Transform image data to something torch models expect
+    The transformation includes:
+    - normalization (division by 255, subtraction of mean, division by std)
+    - cropping and padding of the input images to `image_shape`. For padding,
+      the median of each *individual* image is used.
+    - casting the input images to four dimensions
+      (batch_size, 1, height, width) where the second axis is "channels"
+    Parameters
+    ----------
+    images:
+        Input image array (batch_size, height_in, width_in). If this is a
+        2D image, it will be reshaped to a 3D image with a batch_size of 1.
+    norm_mean:
+        Mean value used for standard score data normalization, i.e.
+        `normalized = `(images / 255 - norm_mean) / norm_std`
+    norm_std:
+        Standard deviation used for standard score data normalization
+        (see above)
+    image_shape
+        Image shape for which the model was created (height, width).
+        If the image shape does not match the input image shape, then
+        the input images are padded/cropped to fit the image shape of
+        the model.
+    Returns
+    -------
+    image_proc:
+        3D array with preprocessed image data of shape
+        (batch_size, 1, height, width)
+    """
+    if len(images.shape) == 2:
+        # Insert indexing axis (batch dimension)
+        images = images[np.newaxis, :, :]
+    batch_size = images.shape[0]
+    # crop and pad the images based on what the model expects
+    image_shape_act = images.shape[1:]
+    if image_shape is None:
+        # model fits perfectly to input data
+        image_shape = image_shape_act
+    # height
+    hdiff = image_shape_act[0] - image_shape[0]
+    ht = abs(hdiff) // 2
+    hb = abs(hdiff) - ht
+    # width
+    wdiff = image_shape_act[1] - image_shape[1]
+    wl = abs(wdiff) // 2
+    wr = abs(wdiff) - wl
+    # helper variables
+    wpad = wdiff < 0
+    wcrp = wdiff > 0
+    hpad = hdiff < 0
+    hcrp = hdiff > 0
+    # The easy part is the cropping
+    if hcrp or wcrp:
+        # define slices for width and height
+        slice_hc = slice(ht, -hb) if hcrp else slice(None, None)
+        slice_wc = slice(wl, -wr) if wcrp else slice(None, None)
+        img_proc = images[:, slice_hc, slice_wc]
+    else:
+        img_proc = images
+    # The hard part is the padding
+    if hpad or wpad:
+        # compute median for each original input image
+        img_med = np.median(images, axis=(1, 2))
+        # broadcast the median array from 1D to 3D
+        img_med = img_med[:, None, None]
+        # define slices for width and height
+        slice_hp = slice(ht, -hb) if hpad else slice(None, None)
+        slice_wp = slice(wl, -wr) if wpad else slice(None, None)
+        # empty padding image stack with the shape required for the model
+        img_pad = np.empty(shape=(batch_size, image_shape[0], image_shape[1]),
+                           dtype=np.float32)
+        # fill in original data
+        img_pad[:, slice_hp, slice_wp] = img_proc
+        # fill in background data for height
+        if hpad:
+            img_pad[:, :ht, :] = img_med
+            img_pad[:, -hb:, :] = img_med
+        # fill in background data for width
+        if wpad:
+            img_pad[:, :, :wl] = img_med
+            img_pad[:, :, -wr:] = img_med
+        # Replace img_norm
+        img_proc = img_pad
+    # normalize images
+    img_norm = (img_proc.astype(np.float32) / 255 - norm_mean) / norm_std
+    # Add a "channels" axis for the ML models.
+    return img_norm[:, np.newaxis, :, :]

dcnum/segm/segmenter.py CHANGED Viewed

@@ -13,6 +13,18 @@ from skimage import morphology
 from ..meta.ppid import kwargs_to_ppid, ppid_to_kwargs
+class SegmenterNotApplicableError(BaseException):
+    """Used to indicate when a dataset cannot be segmented with a segmenter"""
+    def __init__(self, segmenter_class, reasons_list):
+        super(SegmenterNotApplicableError, self).__init__(
+            f"The dataset cannot be segmented with the "
+            f"'{segmenter_class.get_ppid_code()}' segmenter: "
+            f"{', '.join(reasons_list)}"
+        )
+        self.reasons_list = reasons_list
+        self.segmenter_class = segmenter_class
 class Segmenter(abc.ABC):
     #: Required hardware ("cpu" or "gpu") defined in first-level subclass.
     hardware_processor = "none"
@@ -88,7 +100,7 @@ class Segmenter(abc.ABC):
         """Return a unique segmentation pipeline identifier
         The pipeline identifier is universally applicable and must
-        be backwards-compatible (future versions of dcevent will
+        be backwards-compatible (future versions of dcnum will
         correctly acknowledge the ID).
         The segmenter pipeline ID is defined as::
@@ -340,6 +352,34 @@ class Segmenter(abc.ABC):
         This is implemented in the MPO and STO segmenters.
         """
+    @classmethod
+    def validate_applicability(cls,
+                               segmenter_kwargs: Dict,
+                               meta: Dict = None,
+                               logs: Dict = None):
+        """Validate the applicability of this segmenter for a dataset
+        Parameters
+        ----------
+        segmenter_kwargs: dict
+            Keyword arguments for the segmenter
+        meta: dict
+            Dictionary of metadata from an :class:`HDF5Data` instance
+        logs: dict
+            Dictionary of logs from an :class:`HDF5Data` instance
+        Returns
+        -------
+        applicable: bool
+            True if the segmenter is applicable to the dataset
+        Raises
+        ------
+        SegmenterNotApplicableError
+            If the segmenter is not applicable to the dataset
+        """
+        return True
 @functools.cache
 def get_available_segmenters():

{dcnum-0.22.0.dist-info → dcnum-0.23.0.dist-info}/METADATA RENAMED Viewed

@@ -1,8 +1,8 @@
 Metadata-Version: 2.1
 Name: dcnum
-Version: 0.22.0
+Version: 0.23.0
 Summary: numerics toolbox for imaging deformability cytometry
-Author: Maximilian Schlögel, Paul Müller
+Author: Maximilian Schlögel, Paul Müller, Raghava Alajangi
 Maintainer-email: Paul Müller <dev@craban.de>
 License: MIT
 Project-URL: source, https://github.com/DC-Analysis/dcnum
@@ -25,6 +25,8 @@ Requires-Dist: numpy >=1.21
 Requires-Dist: opencv-python-headless
 Requires-Dist: scikit-image
 Requires-Dist: scipy >=1.8.0
+Provides-Extra: torch
+Requires-Dist: torch >=2.3 ; extra == 'torch'
 |dcnum|
 =======

{dcnum-0.22.0.dist-info → dcnum-0.23.0.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
 dcnum/__init__.py,sha256=hcawIKS7utYiOyVhOAX9t7K3xYzP1b9862VV0b6qSrQ,74
-dcnum/_version.py,sha256=U42NllCG9uy3HhEJGou-86_Q3CYkSAexz4DzViMN24w,413
+dcnum/_version.py,sha256=T70sCooCIGWJ8Xde9WLPaxYPJPHtQXVuIM1Xp42tyqE,413
 dcnum/feat/__init__.py,sha256=jUJYWTD3VIoDNKrmryXbjHb1rGwYtK4b7VPWihYgUoo,325
 dcnum/feat/event_extractor_manager_thread.py,sha256=5HdCQCywyQ5QC56AMjSqCroqif9oOFyiSFWCe07JojM,7820
-dcnum/feat/gate.py,sha256=svbObmqpYdqPawpfrsEjTiUPJXf24GrNi8PXTKT-z44,7225
-dcnum/feat/queue_event_extractor.py,sha256=bNdYzMPto37FCIgBbBw-YRQ2TlTpJKCWj9r_Y4sak3E,15700
+dcnum/feat/gate.py,sha256=Yhxq80JoRMmQzBxl35C8NT91c9QcmQa-EIKLuxK6WvE,7221
+dcnum/feat/queue_event_extractor.py,sha256=0ncTQleT1sfc98zYkFuZWxU-akecfTrW6-OOU3z-d8o,15698
 dcnum/feat/feat_background/__init__.py,sha256=OTmMuazHNaSrZb2XW4cnJ6PlgJLbKrPbaidpEixYa0A,341
-dcnum/feat/feat_background/base.py,sha256=phZdyOrHQPjvYlw1JQ8DkdXw5H2-eE1LfLGqCAo1rlo,7965
+dcnum/feat/feat_background/base.py,sha256=A-K3qlJ0ABFBGm5eMKYcNCC7ktFAInSm0eR3N3DHQZY,7963
 dcnum/feat/feat_background/bg_copy.py,sha256=PK8x4_Uph-_A6uszZC5uhe1gD1dSRdHnDMEsN0HSGHA,1034
 dcnum/feat/feat_background/bg_roll_median.py,sha256=EyjstMDXFBYuJB1lN6g4Uw7tPm434X3hXQxKSqvcoJ4,13175
 dcnum/feat/feat_background/bg_sparse_median.py,sha256=ab7Boj7cmr6PBdTbyWTj_yNNJSfuowr7u-iSGW989WI,20709
@@ -18,9 +18,9 @@ dcnum/feat/feat_contour/moments.py,sha256=W8sD2X7JqIBq-9nL82hf4Hm2uJkfca8EvAl_hq
 dcnum/feat/feat_contour/volume.py,sha256=xVHWtv6USUHJZ5dM1Ur7fI7OwoPT5N2Ps0gKVWylfl8,6639
 dcnum/feat/feat_texture/__init__.py,sha256=6StM9S540UVtdFFR3bHa7nfCTomeVdoo7Uy9CjuTgH0,137
 dcnum/feat/feat_texture/common.py,sha256=COXHpXS-7DMouGu3WF83I76L02Sr7P9re4lxajh6g0E,439
-dcnum/feat/feat_texture/tex_all.py,sha256=eGjjNfPpfZw7FA_VNFCIMiU38KD0qcGbxLciYy-tCiA,4097
+dcnum/feat/feat_texture/tex_all.py,sha256=_5H3sXYRN0Uq2eUHn3XUyEHkU_tncEqbqJTC-HZcnGY,5198
 dcnum/logic/__init__.py,sha256=7J3GrwJInNQbrLk61HRIV7X7p69TAIbMYpR34hh6u14,177
-dcnum/logic/ctrl.py,sha256=th9xKVqXtmscCteU6Vum3wZb-H2RSyKL5kNpCDEXrlU,34792
+dcnum/logic/ctrl.py,sha256=FyVlizHOIaIGMqINvM-outPywAQU0-5NM7t1dEDml4c,35332
 dcnum/logic/job.py,sha256=H1uDZ1nnNHNWWCe6mS8OWB0Uxc6XUKLISx5xExeplZY,7015
 dcnum/logic/json_encoder.py,sha256=cxMnqisbKEVf-rVcw6rK2BBAb6iz_hKFaGl81kK36lQ,571
 dcnum/meta/__init__.py,sha256=AVqRgyKXO1orKnE305h88IBvoZ1oz6X11HN1WP5nGvg,60
@@ -31,18 +31,25 @@ dcnum/read/cache.py,sha256=lisrGG7AyvVitf0h92wh5FvYCsxa0pWyGcAyYwGP-LQ,6471
 dcnum/read/const.py,sha256=GG9iyXDtEldvJYOBnhZjlimzIeBMAt4bSr2-xn2gzzc,464
 dcnum/read/hdf5_data.py,sha256=Yyq02UTILc5ZgIQXpR9Y0wuX2WT8s0g23PraI7KxmJY,23489
 dcnum/read/mapped.py,sha256=UryArlrIsHxjOyimBL2Nooi3r73zuGtnGdqdxa6PK_g,3076
-dcnum/segm/__init__.py,sha256=IVP5lv8dTqo25CYLnckHX-4yFsJFraATlWD60KXLL6w,247
+dcnum/segm/__init__.py,sha256=9cLEAd3JWE8IGqDHV-eSDIYOGBfOepd8OcebtNs8Omk,309
 dcnum/segm/segm_thresh.py,sha256=iVhvIhzO0Gw0t3rXOgH71rOI0CNjJJQq4Gg6BulUhK8,948
-dcnum/segm/segmenter.py,sha256=C04cMQmT4K8oZa-CZnYzxXvHZy_UQd3WduF8vQNIgvE,13546
+dcnum/segm/segmenter.py,sha256=FWLFDBR-x_85ku2rObA2F-QBrM4IUaUL-YHChLagVvM,14902
 dcnum/segm/segmenter_manager_thread.py,sha256=frM0sMxC7f7TQiFjmpRxuwG2kUBFpW1inV8dtpADHiI,5924
 dcnum/segm/segmenter_mpo.py,sha256=o6mQlITHgEWvQt9v6oCWwAcZUvxE7MOeLE9DFManzpY,13757
 dcnum/segm/segmenter_sto.py,sha256=e6MtN_RWusA0wTExV-FLGpDXNJs1CbSyXcSdWUPBMvM,3959
+dcnum/segm/segm_torch/__init__.py,sha256=re9jVLYvV1GgC7J5vx2LHKeFYVZPpiwubecAV9f_2kA,670
+dcnum/segm/segm_torch/segm_torch_base.py,sha256=G9AhVyD6LkAmk0tkbYnJUSpvcj3_HYf0uqfILZQsyus,4479
+dcnum/segm/segm_torch/segm_torch_mpo.py,sha256=N01dVXai_4eIGfHJrPjg5C2Bkyq1TOeXeJhw3YbGidw,2504
+dcnum/segm/segm_torch/segm_torch_sto.py,sha256=PTOJrP_FkaxZZul8lM4VA2HL3KyxrheDDWWdJbmJdiw,3393
+dcnum/segm/segm_torch/torch_model.py,sha256=5aL6SwSvg1N2gATEGBhP3aA4WTHlvGzQVYuizmh0LrU,3187
+dcnum/segm/segm_torch/torch_postproc.py,sha256=ctirQTmsZnuZGIxkwFWN9arRneHRYJUxaJ_ZyCgjByM,3311
+dcnum/segm/segm_torch/torch_preproc.py,sha256=Ik_HRxd14pA7FYT5jv-pUkXMWDZrsiGfsEiCsjvSGhU,3762
 dcnum/write/__init__.py,sha256=QvWHeZmjHI18i-YlGYuzN3i7dVWY9UCReKchrJ-gif0,260
 dcnum/write/deque_writer_thread.py,sha256=ao7F1yrVKyufgC4rC0Y2_Vt7snuT6KpI7W2qVxcjdhk,1994
 dcnum/write/queue_collector_thread.py,sha256=d_WfdsZdFnFsiAY0zVMwUlA4juIMeiWYmE_-rezBQCE,11734
 dcnum/write/writer.py,sha256=e6J8YVqhS7kzkpPIMoDMokJpqSy1WWNdOrwaJof1oVc,15601
-dcnum-0.22.0.dist-info/LICENSE,sha256=YRChA1C8A2E-amJbudwMcbTCZy_HzmeY0hMIvduh1MM,1089
-dcnum-0.22.0.dist-info/METADATA,sha256=6Qi51lajhxBvhgWzDNXSfcImZr4MroMlCZ1OLNmBrqw,2194
-dcnum-0.22.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-dcnum-0.22.0.dist-info/top_level.txt,sha256=Hmh38rgG_MFTVDpUDGuO2HWTSq80P585Het4COQzFTg,6
-dcnum-0.22.0.dist-info/RECORD,,
+dcnum-0.23.0.dist-info/LICENSE,sha256=YRChA1C8A2E-amJbudwMcbTCZy_HzmeY0hMIvduh1MM,1089
+dcnum-0.23.0.dist-info/METADATA,sha256=aQzkZcqw9Qh5abdO9ogxYET8cOefG_MngQ8n8AAvRnU,2280
+dcnum-0.23.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+dcnum-0.23.0.dist-info/top_level.txt,sha256=Hmh38rgG_MFTVDpUDGuO2HWTSq80P585Het4COQzFTg,6
+dcnum-0.23.0.dist-info/RECORD,,

{dcnum-0.22.0.dist-info → dcnum-0.23.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{dcnum-0.22.0.dist-info → dcnum-0.23.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{dcnum-0.22.0.dist-info → dcnum-0.23.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dcnum 0.22.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

Potentially problematic release.

dcnum 0.22.0py3-none-any.whl → 0.23.0py3-none-any.whl