PyPI - dcnum - Versions diffs - 0.13.2__py3-none-any.whl → 0.23.1__py3-none-any.whl - Mend

dcnum 0.13.2py3-none-any.whl → 0.23.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dcnum might be problematic. Click here for more details.

Files changed (55) hide show

dcnum/_version.py +2 -2
dcnum/feat/__init__.py +2 -1
dcnum/feat/event_extractor_manager_thread.py +67 -33
dcnum/feat/feat_background/__init__.py +3 -12
dcnum/feat/feat_background/base.py +80 -65
dcnum/feat/feat_background/bg_copy.py +31 -0
dcnum/feat/feat_background/bg_roll_median.py +38 -30
dcnum/feat/feat_background/bg_sparse_median.py +96 -45
dcnum/feat/feat_brightness/__init__.py +1 -0
dcnum/feat/feat_brightness/bright_all.py +41 -6
dcnum/feat/feat_contour/__init__.py +4 -0
dcnum/feat/{feat_moments/mt_legacy.py → feat_contour/moments.py} +32 -8
dcnum/feat/feat_contour/volume.py +174 -0
dcnum/feat/feat_texture/__init__.py +1 -0
dcnum/feat/feat_texture/tex_all.py +28 -1
dcnum/feat/gate.py +92 -70
dcnum/feat/queue_event_extractor.py +139 -70
dcnum/logic/__init__.py +5 -0
dcnum/logic/ctrl.py +794 -0
dcnum/logic/job.py +184 -0
dcnum/logic/json_encoder.py +19 -0
dcnum/meta/__init__.py +1 -0
dcnum/meta/paths.py +30 -0
dcnum/meta/ppid.py +66 -9
dcnum/read/__init__.py +1 -0
dcnum/read/cache.py +109 -77
dcnum/read/const.py +6 -4
dcnum/read/hdf5_data.py +190 -31
dcnum/read/mapped.py +87 -0
dcnum/segm/__init__.py +6 -15
dcnum/segm/segm_thresh.py +7 -14
dcnum/segm/segm_torch/__init__.py +19 -0
dcnum/segm/segm_torch/segm_torch_base.py +125 -0
dcnum/segm/segm_torch/segm_torch_mpo.py +71 -0
dcnum/segm/segm_torch/segm_torch_sto.py +88 -0
dcnum/segm/segm_torch/torch_model.py +95 -0
dcnum/segm/segm_torch/torch_postproc.py +93 -0
dcnum/segm/segm_torch/torch_preproc.py +114 -0
dcnum/segm/segmenter.py +245 -96
dcnum/segm/segmenter_manager_thread.py +39 -28
dcnum/segm/{segmenter_cpu.py → segmenter_mpo.py} +137 -43
dcnum/segm/segmenter_sto.py +110 -0
dcnum/write/__init__.py +3 -1
dcnum/write/deque_writer_thread.py +15 -5
dcnum/write/queue_collector_thread.py +14 -17
dcnum/write/writer.py +225 -55
{dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/METADATA +4 -2
dcnum-0.23.1.dist-info/RECORD +55 -0
{dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/WHEEL +1 -1
dcnum/feat/feat_moments/__init__.py +0 -3
dcnum/segm/segmenter_gpu.py +0 -45
dcnum-0.13.2.dist-info/RECORD +0 -40
/dcnum/feat/{feat_moments/ct_opencv.py → feat_contour/contour.py} +0 -0
{dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/LICENSE +0 -0
{dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/top_level.txt +0 -0

dcnum/segm/segm_torch/segm_torch_base.py ADDED Viewed

@@ -0,0 +1,125 @@
+import functools
+import pathlib
+import re
+from typing import Dict
+from ...meta import paths
+from ..segmenter import Segmenter, SegmenterNotApplicableError
+from .torch_model import load_model
+class TorchSegmenterBase(Segmenter):
+    """Torch segmenters that use a pretrained model for segmentation"""
+    requires_background_correction = False
+    mask_postprocessing = True
+    mask_default_kwargs = {
+        "clear_border": True,
+        "fill_holes": True,
+        "closing_disk": 0,
+    }
+    @classmethod
+    def get_ppid_from_ppkw(cls, kwargs, kwargs_mask=None):
+        kwargs_new = kwargs.copy()
+        # Make sure that the `model_file` kwarg is actually just a filename
+        # so that the pipeline identifier only contains the name, but not
+        # the full path.
+        if "model_file" in kwargs:
+            model_file = kwargs["model_file"]
+            mpath = pathlib.Path(model_file)
+            if mpath.exists():
+                # register the location of the file in the search path
+                # registry so other threads/processes will find it.
+                paths.register_search_path("torch_model_files", mpath.parent)
+                kwargs_new["model_file"] = mpath.name
+        return super(TorchSegmenterBase, cls).get_ppid_from_ppkw(kwargs_new,
+                                                                 kwargs_mask)
+    @classmethod
+    def validate_applicability(cls,
+                               segmenter_kwargs: Dict,
+                               meta: Dict = None,
+                               logs: Dict = None):
+        """Validate the applicability of this segmenter for a dataset
+        The applicability is defined by the metadata in the segmentation
+        model.
+        Parameters
+        ----------
+        segmenter_kwargs: dict
+            Keyword arguments for the segmenter
+        meta: dict
+            Dictionary of metadata from an :class:`HDF5Data` instance
+        logs: dict
+            Dictionary of logs from an :class:`HDF5Data` instance
+        Returns
+        -------
+        applicable: bool
+            True if the segmenter is applicable to the dataset
+        Raises
+        ------
+        SegmenterNotApplicable
+            If the segmenter is not applicable to the dataset
+        """
+        if "model_file" not in segmenter_kwargs:
+            raise ValueError("A `model_file` must be provided in the "
+                             "`segmenter_kwargs` to validate applicability")
+        model_file = segmenter_kwargs["model_file"]
+        _, model_meta = load_model(model_file, device="cpu")
+        reasons_list = []
+        validators = {
+            "meta": functools.partial(
+                cls._validate_applicability_item,
+                data_dict=meta,
+                reasons_list=reasons_list),
+            "logs": functools.partial(
+                cls._validate_applicability_item,
+                # convert logs to strings
+                data_dict={key: "\n".join(val) for key, val in logs.items()},
+                reasons_list=reasons_list)
+        }
+        for item in model_meta.get("validation", []):
+            it = item["type"]
+            if it in validators:
+                validators[it](item)
+            else:
+                reasons_list.append(
+                    f"invalid validation type {it} in {model_file}")
+        if reasons_list:
+            raise SegmenterNotApplicableError(segmenter_class=cls,
+                                              reasons_list=reasons_list)
+        return True
+    @staticmethod
+    def _validate_applicability_item(item, data_dict, reasons_list):
+        """Populate `reasons_list` with invalid entries
+        Example `data_dict`::
+            {"type": "meta",
+             "key": "setup:region",
+             "allow-missing-key": False,
+             "regexp": "^channel$",
+             "regexp-negate": False,
+             "reason": "only channel region supported",
+             }
+        """
+        key = item["key"]
+        if key in data_dict:
+            regexp = re.compile(item["regexp"])
+            matched = bool(regexp.match(data_dict[key]))
+            negate = item.get("regexp-negate", False)
+            valid = matched if not negate else not matched
+            if not valid:
+                reasons_list.append(item.get("reason", "unknown reason"))
+        elif not item.get("allow-missing-key", False):
+            reasons_list.append(f"Key '{key}' missing in {item['type']}")

dcnum/segm/segm_torch/segm_torch_mpo.py ADDED Viewed

@@ -0,0 +1,71 @@
+import numpy as np
+import torch
+from ..segmenter_mpo import MPOSegmenter
+from .segm_torch_base import TorchSegmenterBase
+from .torch_model import load_model
+from .torch_preproc import preprocess_images
+from .torch_postproc import postprocess_masks
+class SegmentTorchMPO(TorchSegmenterBase, MPOSegmenter):
+    """PyTorch segmentation (multiprocessing version)"""
+    @staticmethod
+    def segment_algorithm(image, *,
+                          model_file: str = None):
+        """
+        Parameters
+        ----------
+        image: 2d ndarray
+            event image
+        model_file: str
+            path to or name of a dcnum model file (.dcnm); if only a
+            name is provided, then the "torch_model_files" directory
+            paths are searched for the file name
+        Returns
+        -------
+        mask: 2d boolean or integer ndarray
+            mask or labeling image for the give index
+        """
+        if model_file is None:
+            raise ValueError("Please specify a .dcnm model file!")
+        # Set number of pytorch threads to 1, because dcnum is doing
+        # all the multiprocessing.
+        # https://pytorch.org/docs/stable/generated/torch.set_num_threads.html#torch.set_num_threads
+        torch.set_num_threads(1)
+        device = torch.device("cpu")
+        # Load model and metadata
+        model, model_meta = load_model(model_file, device)
+        image_preproc = preprocess_images(image[np.newaxis, :, :],
+                                          **model_meta["preprocessing"])
+        image_ten = torch.from_numpy(image_preproc)
+        # Move image tensors to device
+        image_ten_on_device = image_ten.to(device)
+        # Model inference
+        pred_tensor = model(image_ten_on_device)
+        # Convert cuda-tensor into numpy mask array. The `pred_tensor`
+        # array is still of the shape (1, 1, H, W). The `masks`
+        # array is of shape (1, H, W). We can optionally label it
+        # here (we have to if the shapes don't match) or do it in
+        # postprocessing.
+        masks = pred_tensor.detach().cpu().numpy()[0] >= 0.5
+        # Perform postprocessing in cases where the image shapes don't match
+        assert len(masks[0].shape) == len(image.shape), "sanity check"
+        if masks[0].shape != image.shape:
+            labels = postprocess_masks(
+                masks=masks,
+                original_image_shape=image.shape,
+            )
+            return labels[0]
+        else:
+            return masks[0]

dcnum/segm/segm_torch/segm_torch_sto.py ADDED Viewed

@@ -0,0 +1,88 @@
+from dcnum.segm import STOSegmenter
+import numpy as np
+import torch
+from .segm_torch_base import TorchSegmenterBase
+from .torch_model import load_model
+from .torch_preproc import preprocess_images
+from .torch_postproc import postprocess_masks
+class SegmentTorchSTO(TorchSegmenterBase, STOSegmenter):
+    """PyTorch segmentation (GPU version)"""
+    @staticmethod
+    def _segment_in_batches(imgs_t, model, batch_size, device):
+        """Segment image data in batches"""
+        size = len(imgs_t)
+        # Create empty array to fill up with segmented batches
+        masks = np.empty((len(imgs_t), *imgs_t[0].shape[-2:]),
+                         dtype=bool)
+        for start_idx in range(0, size, batch_size):
+            batch = imgs_t[start_idx:start_idx + batch_size]
+            # Move image tensors to cuda
+            batch = torch.tensor(batch, device=device)
+            # Model inference
+            batch_seg = model(batch)
+            # Remove extra dim [B, C, H, W] --> [B, H, W]
+            batch_seg = batch_seg.squeeze(1)
+            # Convert cuda-tensor into numpy arrays
+            batch_seg_np = batch_seg.detach().cpu().numpy()
+            # Fill empty array with segmented batch
+            masks[start_idx:start_idx + batch_size] = batch_seg_np >= 0.5
+        return masks
+    @staticmethod
+    def segment_algorithm(images, gpu_id=None, batch_size=50, *,
+                          model_file: str = None):
+        """
+        Parameters
+        ----------
+        images: 3d ndarray
+            array of N event images of shape (N, H, W)
+        gpu_id: str
+            optional argument specifying the GPU to use
+        batch_size: int
+            number of images to process in one batch
+        model_file: str
+            path to or name of a dcnum model file (.dcnm); if only a
+            name is provided, then the "torch_model_files" directory
+            paths are searched for the file name
+        Returns
+        -------
+        mask: 2d boolean or integer ndarray
+            mask or label images of shape (N, H, W)
+        """
+        if model_file is None:
+            raise ValueError("Please specify a model file!")
+        # Determine device to use
+        device = torch.device(gpu_id if gpu_id is not None else "cuda")
+        # Load model and metadata
+        model, model_meta = load_model(model_file, device)
+        # Preprocess the images
+        image_preproc = preprocess_images(images,
+                                          **model_meta["preprocessing"])
+        # Model inference
+        # The `masks` array has the shape (len(images), H, W), where
+        # H and W may be different from the corresponding axes in `images`.
+        masks = SegmentTorchSTO._segment_in_batches(image_preproc,
+                                                    model,
+                                                    batch_size,
+                                                    device
+                                                    )
+        # Perform postprocessing in cases where the image shapes don't match
+        assert len(masks.shape[1:]) == len(images.shape[1:]), "sanity check"
+        if masks.shape[1:] != images.shape[1:]:
+            labels = postprocess_masks(
+                masks=masks,
+                original_image_shape=images.shape[1:])
+            return labels
+        else:
+            return masks

dcnum/segm/segm_torch/torch_model.py ADDED Viewed

@@ -0,0 +1,95 @@
+import errno
+import functools
+import hashlib
+import json
+import logging
+import os
+import pathlib
+import torch
+from ...meta import paths
+logger = logging.getLogger(__name__)
+def check_md5sum(path):
+    """Verify the last five characters of the file stem with its MD5 hash"""
+    md5 = hashlib.md5(path.read_bytes()).hexdigest()
+    if md5[:5] != path.stem.split("_")[-1]:
+        raise ValueError(f"MD5 mismatch for {path} ({md5})! Expected the "
+                         f"input file to end with '{md5[:5]}{path.suffix}'.")
+@functools.cache
+def load_model(path_or_name, device):
+    """Load a PyTorch model + metadata from a TorchScript jit checkpoint
+    Parameters
+    ----------
+    path_or_name: str or pathlib.Path
+        jit checkpoint file; For dcnum, these files have the suffix .dcnm
+        and contain a special `_extra_files["dcnum_meta.json"]` extra
+        file that can be loaded via `torch.jit.load` (see below).
+    device: str or torch.device
+        device on which to run the model
+    Returns
+    -------
+    model_jit: torch.jit.ScriptModule
+        loaded PyTorch model stored as a TorchScript module
+    model_meta: dict
+        metadata associated with the loaded model
+    """
+    model_path = retrieve_model_file(path_or_name)
+    # define an extra files mapping dictionary that loads the model's metadata
+    extra_files = {"dcnum_meta.json": ""}
+    # load model
+    model_jit = torch.jit.load(model_path,
+                               _extra_files=extra_files,
+                               map_location=device)
+    # load model metadata
+    model_meta = json.loads(extra_files["dcnum_meta.json"])
+    # set model to evaluation mode
+    model_jit.eval()
+    # optimize for inference on device
+    model_jit = torch.jit.optimize_for_inference(model_jit)
+    return model_jit, model_meta
+@functools.cache
+def retrieve_model_file(path_or_name):
+    """Retrieve a dcnum torch model file
+    If a path to a model is given, then this path is returned directly.
+    If a file name is given, then look for the file with
+    :func:`dcnum.meta.paths.find_file` using the "torch_model_file"
+    topic.
+    """
+    # Did the user already pass a path?
+    if isinstance(path_or_name, pathlib.Path):
+        if path_or_name.exists():
+            path = path_or_name
+        else:
+            try:
+                return retrieve_model_file(path_or_name.name)
+            except BaseException:
+                raise FileNotFoundError(errno.ENOENT,
+                                        os.strerror(errno.ENOENT),
+                                        str(path_or_name))
+    elif isinstance(path_or_name, str):
+        name = path_or_name.strip()
+        # We now have a string for a filename, and we have to figure out what
+        # the path is. There are several options, including cached files.
+        if pathlib.Path(name).exists():
+            path = pathlib.Path(name)
+        else:
+            path = paths.find_file("torch_model_files", name)
+    else:
+        raise ValueError(
+            f"Please pass a string or a path, got {type(path_or_name)}!")
+    logger.info(f"Found dcnum model file {path}")
+    check_md5sum(path)
+    return path

dcnum/segm/segm_torch/torch_postproc.py ADDED Viewed

@@ -0,0 +1,93 @@
+from typing import Tuple
+from ..segmenter import Segmenter
+import numpy as np
+from scipy import ndimage as ndi
+def postprocess_masks(masks,
+                      original_image_shape: Tuple[int, int]):
+    """Postprocess mask images from ML segmenters
+    The transformation includes:
+    - Revert the cropping and padding operations done in
+      :func:`.preprocess_images` by padding with zeros and cropping.
+    - If the original image shape is larger than the mask image shape,
+      also clear borders in an intermediate step
+      (maks postprocessing using :func:`Segmenter.process_mask`).
+    Parameters
+    ----------
+    masks: 3d or 4d ndarray
+        Mask data in shape (batch_size, 1, imagex_size, imagey_size)
+        or (batch_size, imagex_size, imagey_size).
+    original_image_shape: tuple of (int, int)
+        The required output mask shape for one event. This required for
+        doing the inverse of what is done in :func:`.preprocess_images`.
+    Returns
+    -------
+    labels_proc: np.ndarray
+        An integer array with the same dimensions as the original image
+        data passed to :func:`.preprocess_images`. The shape of this array
+        is (batch_size, original_image_shape[0], original_image_shape[1]).
+    """
+    # If output of model is 4d, remove channel axis
+    if len(masks.shape) == 4:
+        masks = masks[:, 0, :, :]
+    # Label the mask image
+    labels = np.empty(masks.shape, dtype=np.uint16)
+    label_struct = ndi.generate_binary_structure(2, 2)
+    for ii in range(masks.shape[0]):
+        ndi.label(
+            input=masks[ii],
+            output=labels[ii],
+            structure=label_struct)
+    batch_size = labels.shape[0]
+    # Revert padding and cropping from preprocessing
+    mask_shape_ret = labels.shape[1:]
+    # height
+    s0diff = original_image_shape[0] - mask_shape_ret[0]
+    s0t = abs(s0diff) // 2
+    s0b = abs(s0diff) - s0t
+    # width
+    s1diff = original_image_shape[1] - mask_shape_ret[1]
+    s1l = abs(s1diff) // 2
+    s1r = abs(s1diff) - s1l
+    if s0diff > 0 or s1diff > 0:
+        # The masks that we have must be padded. Before we do that, we have
+        # to remove events on the edges, otherwise we will have half-segmented
+        # cell events in the output array.
+        for ii in range(batch_size):
+            labels[ii] = Segmenter.process_mask(labels[ii],
+                                                clear_border=True,
+                                                fill_holes=False,
+                                                closing_disk=0)
+    # Crop first, only then pad.
+    if s1diff > 0:
+        labels_pad = np.zeros((batch_size,
+                              labels.shape[1],
+                              original_image_shape[1]),
+                              dtype=np.uint16)
+        labels_pad[:, :, s1l:-s1r] = labels
+        labels = labels_pad
+    elif s1diff < 0:
+        labels = labels[:, :, s1l:-s1r]
+    if s0diff > 0:
+        labels_pad = np.zeros((batch_size,
+                              original_image_shape[0],
+                              original_image_shape[1]),
+                              dtype=np.uint16)
+        labels_pad[:, s0t:-s0b, :] = labels
+        labels = labels_pad
+    elif s0diff < 0:
+        labels = labels[:, s0t:-s0b, :]
+    return labels

dcnum/segm/segm_torch/torch_preproc.py ADDED Viewed

@@ -0,0 +1,114 @@
+from typing import Tuple
+import numpy as np
+def preprocess_images(images: np.ndarray,
+                      norm_mean: float | None,
+                      norm_std: float | None,
+                      image_shape: Tuple[int, int] = None,
+                      ):
+    """Transform image data to something torch models expect
+    The transformation includes:
+    - normalization (division by 255, subtraction of mean, division by std)
+    - cropping and padding of the input images to `image_shape`. For padding,
+      the median of each *individual* image is used.
+    - casting the input images to four dimensions
+      (batch_size, 1, height, width) where the second axis is "channels"
+    Parameters
+    ----------
+    images:
+        Input image array (batch_size, height_in, width_in). If this is a
+        2D image, it will be reshaped to a 3D image with a batch_size of 1.
+    norm_mean:
+        Mean value used for standard score data normalization, i.e.
+        `normalized = `(images / 255 - norm_mean) / norm_std`; Set
+        to None to disable normalization.
+    norm_std:
+        Standard deviation used for standard score data normalization;
+        Set to None to disable normalization (see above).
+    image_shape
+        Image shape for which the model was created (height, width).
+        If the image shape does not match the input image shape, then
+        the input images are padded/cropped to fit the image shape of
+        the model.
+    Returns
+    -------
+    image_proc:
+        3D array with preprocessed image data of shape
+        (batch_size, 1, height, width)
+    """
+    if len(images.shape) == 2:
+        # Insert indexing axis (batch dimension)
+        images = images[np.newaxis, :, :]
+    batch_size = images.shape[0]
+    # crop and pad the images based on what the model expects
+    image_shape_act = images.shape[1:]
+    if image_shape is None:
+        # model fits perfectly to input data
+        image_shape = image_shape_act
+    # height
+    hdiff = image_shape_act[0] - image_shape[0]
+    ht = abs(hdiff) // 2
+    hb = abs(hdiff) - ht
+    # width
+    wdiff = image_shape_act[1] - image_shape[1]
+    wl = abs(wdiff) // 2
+    wr = abs(wdiff) - wl
+    # helper variables
+    wpad = wdiff < 0
+    wcrp = wdiff > 0
+    hpad = hdiff < 0
+    hcrp = hdiff > 0
+    # The easy part is the cropping
+    if hcrp or wcrp:
+        # define slices for width and height
+        slice_hc = slice(ht, -hb) if hcrp else slice(None, None)
+        slice_wc = slice(wl, -wr) if wcrp else slice(None, None)
+        img_proc = images[:, slice_hc, slice_wc]
+    else:
+        img_proc = images
+    # The hard part is the padding
+    if hpad or wpad:
+        # compute median for each original input image
+        img_med = np.median(images, axis=(1, 2))
+        # broadcast the median array from 1D to 3D
+        img_med = img_med[:, None, None]
+        # define slices for width and height
+        slice_hp = slice(ht, -hb) if hpad else slice(None, None)
+        slice_wp = slice(wl, -wr) if wpad else slice(None, None)
+        # empty padding image stack with the shape required for the model
+        img_pad = np.empty(shape=(batch_size, image_shape[0], image_shape[1]),
+                           dtype=np.float32)
+        # fill in original data
+        img_pad[:, slice_hp, slice_wp] = img_proc
+        # fill in background data for height
+        if hpad:
+            img_pad[:, :ht, :] = img_med
+            img_pad[:, -hb:, :] = img_med
+        # fill in background data for width
+        if wpad:
+            img_pad[:, :, :wl] = img_med
+            img_pad[:, :, -wr:] = img_med
+        # Replace img_norm
+        img_proc = img_pad
+    if norm_mean is None or norm_std is None:
+        # convert to float32
+        img_norm = img_proc.astype(np.float32)
+    else:
+        # normalize images
+        img_norm = (img_proc.astype(np.float32) / 255 - norm_mean) / norm_std
+    # Add a "channels" axis for the ML models.
+    return img_norm[:, np.newaxis, :, :]

dcnum 0.13.2__py3-none-any.whl → 0.23.1__py3-none-any.whl

Potentially problematic release.

dcnum 0.13.2py3-none-any.whl → 0.23.1py3-none-any.whl