dcnum 0.22.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dcnum might be problematic. Click here for more details.

dcnum/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.22.0'
16
- __version_tuple__ = version_tuple = (0, 22, 0)
15
+ __version__ = version = '0.23.0'
16
+ __version_tuple__ = version_tuple = (0, 23, 0)
@@ -130,7 +130,7 @@ class Background(abc.ABC):
130
130
  """Return a unique background pipeline identifier
131
131
 
132
132
  The pipeline identifier is universally applicable and must
133
- be backwards-compatible (future versions of dcevent will
133
+ be backwards-compatible (future versions of dcnum will
134
134
  correctly acknowledge the ID).
135
135
 
136
136
  The segmenter pipeline ID is defined as::
@@ -6,6 +6,34 @@ from .common import haralick_names
6
6
 
7
7
  def haralick_texture_features(
8
8
  mask, image=None, image_bg=None, image_corr=None):
9
+ """Compute Haralick texture features
10
+
11
+ The following texture features are excluded
12
+
13
+ - feature 6 "Sum Average", which is equivalent to `2 * bright_bc_avg`
14
+ since dclab 0.44.0
15
+ - feature 10 "Difference Variance", because it has a functional
16
+ dependency on the offset value and since we do background correction,
17
+ we are not interested in it
18
+ - feature 14, because nobody is using it, it is not understood by
19
+ everyone what it actually is, and it is computationally expensive.
20
+
21
+ This leaves us with the following 11 texture features (22 if you count
22
+ avg and ptp):
23
+ https://earlglynn.github.io/RNotes/package/EBImage/Haralick-Textural-Features.html
24
+
25
+ - 1. `tex_asm`: (1) Angular Second Moment
26
+ - 2. `tex_con`: (2) Contrast
27
+ - 3. `tex_cor`: (3) Correlation
28
+ - 4. `tex_var`: (4) Variance
29
+ - 5. `tex_idm`: (5) Inverse Difference Moment
30
+ - 6. `tex_sva`: (7) Sum Variance
31
+ - 7. `tex_sen`: (8) Sum Entropy
32
+ - 8. `tex_ent`: (9) Entropy
33
+ - 9. `tex_den`: (11) Difference Entropy
34
+ - 10. `tex_f12`: (12) Information Measure of Correlation 1
35
+ - 11. `tex_f13`: (13) Information Measure of Correlation 2
36
+ """
9
37
  # make sure we have a boolean array
10
38
  mask = np.array(mask, dtype=bool)
11
39
  size = mask.shape[0]
@@ -22,7 +50,6 @@ def haralick_texture_features(
22
50
 
23
51
  for ii in range(size):
24
52
  # Haralick texture features
25
- # https://gitlab.gwdg.de/blood_data_analysis/dcevent/-/issues/20
26
53
  # Preprocessing:
27
54
  # - create a copy of the array (don't edit `image_corr`)
28
55
  # - add grayscale values (negative values not supported)
dcnum/feat/gate.py CHANGED
@@ -20,7 +20,7 @@ class Gate:
20
20
  Parameters
21
21
  ----------
22
22
  data: .HDF5Data
23
- dcevent data instance
23
+ dcnum data instance
24
24
  online_gates: bool
25
25
  set to True to enable gating with "online" gates stored
26
26
  in the input file; online gates are applied in real-time
@@ -95,7 +95,7 @@ class Gate:
95
95
  """Return a unique gating pipeline identifier
96
96
 
97
97
  The pipeline identifier is universally applicable and must
98
- be backwards-compatible (future versions of dcevent will
98
+ be backwards-compatible (future versions of dcnum will
99
99
  correctly acknowledge the ID).
100
100
 
101
101
  The gating pipeline ID is defined as::
@@ -266,7 +266,7 @@ class QueueEventExtractor:
266
266
  """Return a unique feature extractor pipeline identifier
267
267
 
268
268
  The pipeline identifier is universally applicable and must
269
- be backwards-compatible (future versions of dcevent will
269
+ be backwards-compatible (future versions of dcnum will
270
270
  correctly acknowledge the ID).
271
271
 
272
272
  The feature extractor pipeline ID is defined as::
dcnum/logic/ctrl.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import collections
2
2
  import datetime
3
+ import hashlib
3
4
  import json
4
5
  import logging
5
6
  from logging.handlers import QueueListener
@@ -338,7 +339,8 @@ class DCNumJobRunner(threading.Thread):
338
339
  # hash sanity check above, check the generation, input data,
339
340
  # and background pipeline identifiers.
340
341
  redo_bg = (
341
- (datdict["gen_id"] != self.ppdict["gen_id"])
342
+ "image_bg" not in self.draw
343
+ or (datdict["gen_id"] != self.ppdict["gen_id"])
342
344
  or (datdict["dat_id"] != self.ppdict["dat_id"])
343
345
  or (datdict["bg_id"] != self.ppdict["bg_id"]))
344
346
 
@@ -461,9 +463,17 @@ class DCNumJobRunner(threading.Thread):
461
463
  # This is the identifier appendix that we use to identify this
462
464
  # dataset. Note that we only override the run identifier when
463
465
  # segmentation did actually take place.
464
- mid_ap = "dcn-" + self.pphash[:7]
465
- # This is the current measurement identifier (may be empty).
466
- mid_cur = hw.h5.attrs.get("experiment:run identifier", "")
466
+ mid_ap = f"dcn-{self.pphash[:7]}"
467
+ # This is the current measurement identifier
468
+ mid_cur = hw.h5.attrs.get("experiment:run identifier")
469
+ if not mid_cur:
470
+ # Compute a measurement identifier from the metadata
471
+ m_time = hw.h5.attrs.get("experiment:time", "none")
472
+ m_date = hw.h5.attrs.get("experiment:date", "none")
473
+ m_sid = hw.h5.attrs.get("setup:identifier", "none")
474
+ hasher = hashlib.md5(
475
+ f"{m_time}_{m_date}_{m_sid}".encode("utf-8"))
476
+ mid_cur = str(uuid.UUID(hex=hasher.hexdigest()))
467
477
  # The new measurement identifier is a combination of both.
468
478
  mid_new = f"{mid_cur}_{mid_ap}" if mid_cur else mid_ap
469
479
  hw.h5.attrs["experiment:run identifier"] = mid_new
dcnum/segm/__init__.py CHANGED
@@ -1,6 +1,9 @@
1
1
  # flake8: noqa: F401
2
- from .segmenter import Segmenter, get_available_segmenters
2
+ from .segmenter import (
3
+ Segmenter, SegmenterNotApplicableError, get_available_segmenters
4
+ )
3
5
  from .segmenter_mpo import MPOSegmenter
4
6
  from .segmenter_sto import STOSegmenter
5
7
  from .segmenter_manager_thread import SegmenterManagerThread
6
8
  from . import segm_thresh
9
+ from . import segm_torch
@@ -0,0 +1,19 @@
1
+ import importlib
2
+
3
+ try:
4
+ torch = importlib.import_module("torch")
5
+ req_maj = 2
6
+ req_min = 3
7
+ ver_tuple = torch.__version__.split(".")
8
+ act_maj = int(ver_tuple[0])
9
+ act_min = int(ver_tuple[1])
10
+ if act_maj < req_maj or (act_maj == req_maj and act_min < req_min):
11
+ raise ValueError(f"Your PyTorch version {act_maj}.{act_min} is not "
12
+ f"supported, please update to at least "
13
+ f"{req_maj}.{req_min}")
14
+ except ImportError:
15
+ pass
16
+ else:
17
+ from .segm_torch_mpo import SegmentTorchMPO # noqa: F401
18
+ if torch.cuda.is_available():
19
+ from .segm_torch_sto import SegmentTorchSTO # noqa: F401
@@ -0,0 +1,125 @@
1
+ import functools
2
+ import pathlib
3
+ import re
4
+ from typing import Dict
5
+
6
+ from ...meta import paths
7
+
8
+ from ..segmenter import Segmenter, SegmenterNotApplicableError
9
+
10
+ from .torch_model import load_model
11
+
12
+
13
+ class TorchSegmenterBase(Segmenter):
14
+ """Torch segmenters that use a pretrained model for segmentation"""
15
+ requires_background_correction = False
16
+ mask_postprocessing = True
17
+ mask_default_kwargs = {
18
+ "clear_border": True,
19
+ "fill_holes": True,
20
+ "closing_disk": 0,
21
+ }
22
+
23
+ @classmethod
24
+ def get_ppid_from_ppkw(cls, kwargs, kwargs_mask=None):
25
+ kwargs_new = kwargs.copy()
26
+ # Make sure that the `model_file` kwarg is actually just a filename
27
+ # so that the pipeline identifier only contains the name, but not
28
+ # the full path.
29
+ if "model_file" in kwargs:
30
+ model_file = kwargs["model_file"]
31
+ mpath = pathlib.Path(model_file)
32
+ if mpath.exists():
33
+ # register the location of the file in the search path
34
+ # registry so other threads/processes will find it.
35
+ paths.register_search_path("torch_model_files", mpath.parent)
36
+ kwargs_new["model_file"] = mpath.name
37
+ return super(TorchSegmenterBase, cls).get_ppid_from_ppkw(kwargs_new,
38
+ kwargs_mask)
39
+
40
+ @classmethod
41
+ def validate_applicability(cls,
42
+ segmenter_kwargs: Dict,
43
+ meta: Dict = None,
44
+ logs: Dict = None):
45
+ """Validate the applicability of this segmenter for a dataset
46
+
47
+ The applicability is defined by the metadata in the segmentation
48
+ model.
49
+
50
+ Parameters
51
+ ----------
52
+ segmenter_kwargs: dict
53
+ Keyword arguments for the segmenter
54
+ meta: dict
55
+ Dictionary of metadata from an :class:`HDF5Data` instance
56
+ logs: dict
57
+ Dictionary of logs from an :class:`HDF5Data` instance
58
+
59
+ Returns
60
+ -------
61
+ applicable: bool
62
+ True if the segmenter is applicable to the dataset
63
+
64
+ Raises
65
+ ------
66
+ SegmenterNotApplicable
67
+ If the segmenter is not applicable to the dataset
68
+ """
69
+ if "model_file" not in segmenter_kwargs:
70
+ raise ValueError("A `model_file` must be provided in the "
71
+ "`segmenter_kwargs` to validate applicability")
72
+
73
+ model_file = segmenter_kwargs["model_file"]
74
+ _, model_meta = load_model(model_file, device="cpu")
75
+
76
+ reasons_list = []
77
+ validators = {
78
+ "meta": functools.partial(
79
+ cls._validate_applicability_item,
80
+ data_dict=meta,
81
+ reasons_list=reasons_list),
82
+ "logs": functools.partial(
83
+ cls._validate_applicability_item,
84
+ # convert logs to strings
85
+ data_dict={key: "\n".join(val) for key, val in logs.items()},
86
+ reasons_list=reasons_list)
87
+ }
88
+ for item in model_meta.get("validation", []):
89
+ it = item["type"]
90
+ if it in validators:
91
+ validators[it](item)
92
+ else:
93
+ reasons_list.append(
94
+ f"invalid validation type {it} in {model_file}")
95
+
96
+ if reasons_list:
97
+ raise SegmenterNotApplicableError(segmenter_class=cls,
98
+ reasons_list=reasons_list)
99
+
100
+ return True
101
+
102
+ @staticmethod
103
+ def _validate_applicability_item(item, data_dict, reasons_list):
104
+ """Populate `reasons_list` with invalid entries
105
+
106
+ Example `data_dict`::
107
+
108
+ {"type": "meta",
109
+ "key": "setup:region",
110
+ "allow-missing-key": False,
111
+ "regexp": "^channel$",
112
+ "regexp-negate": False,
113
+ "reason": "only channel region supported",
114
+ }
115
+ """
116
+ key = item["key"]
117
+ if key in data_dict:
118
+ regexp = re.compile(item["regexp"])
119
+ matched = bool(regexp.match(data_dict[key]))
120
+ negate = item.get("regexp-negate", False)
121
+ valid = matched if not negate else not matched
122
+ if not valid:
123
+ reasons_list.append(item.get("reason", "unknown reason"))
124
+ elif not item.get("allow-missing-key", False):
125
+ reasons_list.append(f"Key '{key}' missing in {item['type']}")
@@ -0,0 +1,71 @@
1
+ import numpy as np
2
+ import torch
3
+
4
+ from ..segmenter_mpo import MPOSegmenter
5
+
6
+ from .segm_torch_base import TorchSegmenterBase
7
+ from .torch_model import load_model
8
+ from .torch_preproc import preprocess_images
9
+ from .torch_postproc import postprocess_masks
10
+
11
+
12
+ class SegmentTorchMPO(TorchSegmenterBase, MPOSegmenter):
13
+ """PyTorch segmentation (multiprocessing version)"""
14
+
15
+ @staticmethod
16
+ def segment_algorithm(image, *,
17
+ model_file: str = None):
18
+ """
19
+ Parameters
20
+ ----------
21
+ image: 2d ndarray
22
+ event image
23
+ model_file: str
24
+ path to or name of a dcnum model file (.dcnm); if only a
25
+ name is provided, then the "torch_model_files" directory
26
+ paths are searched for the file name
27
+
28
+ Returns
29
+ -------
30
+ mask: 2d boolean or integer ndarray
31
+ mask or labeling image for the give index
32
+ """
33
+ if model_file is None:
34
+ raise ValueError("Please specify a .dcnm model file!")
35
+
36
+ # Set number of pytorch threads to 1, because dcnum is doing
37
+ # all the multiprocessing.
38
+ # https://pytorch.org/docs/stable/generated/torch.set_num_threads.html#torch.set_num_threads
39
+ torch.set_num_threads(1)
40
+ device = torch.device("cpu")
41
+
42
+ # Load model and metadata
43
+ model, model_meta = load_model(model_file, device)
44
+
45
+ image_preproc = preprocess_images(image[np.newaxis, :, :],
46
+ **model_meta["preprocessing"])
47
+
48
+ image_ten = torch.from_numpy(image_preproc)
49
+
50
+ # Move image tensors to device
51
+ image_ten_on_device = image_ten.to(device)
52
+ # Model inference
53
+ pred_tensor = model(image_ten_on_device)
54
+
55
+ # Convert cuda-tensor into numpy mask array. The `pred_tensor`
56
+ # array is still of the shape (1, 1, H, W). The `masks`
57
+ # array is of shape (1, H, W). We can optionally label it
58
+ # here (we have to if the shapes don't match) or do it in
59
+ # postprocessing.
60
+ masks = pred_tensor.detach().cpu().numpy()[0] >= 0.5
61
+
62
+ # Perform postprocessing in cases where the image shapes don't match
63
+ assert len(masks[0].shape) == len(image.shape), "sanity check"
64
+ if masks[0].shape != image.shape:
65
+ labels = postprocess_masks(
66
+ masks=masks,
67
+ original_image_shape=image.shape,
68
+ )
69
+ return labels[0]
70
+ else:
71
+ return masks[0]
@@ -0,0 +1,88 @@
1
+ from dcnum.segm import STOSegmenter
2
+ import numpy as np
3
+ import torch
4
+
5
+ from .segm_torch_base import TorchSegmenterBase
6
+ from .torch_model import load_model
7
+ from .torch_preproc import preprocess_images
8
+ from .torch_postproc import postprocess_masks
9
+
10
+
11
+ class SegmentTorchSTO(TorchSegmenterBase, STOSegmenter):
12
+ """PyTorch segmentation (GPU version)"""
13
+
14
+ @staticmethod
15
+ def _segment_in_batches(imgs_t, model, batch_size, device):
16
+ """Segment image data in batches"""
17
+ size = len(imgs_t)
18
+ # Create empty array to fill up with segmented batches
19
+ masks = np.empty((len(imgs_t), *imgs_t[0].shape[-2:]),
20
+ dtype=bool)
21
+
22
+ for start_idx in range(0, size, batch_size):
23
+ batch = imgs_t[start_idx:start_idx + batch_size]
24
+ # Move image tensors to cuda
25
+ batch = torch.tensor(batch, device=device)
26
+ # Model inference
27
+ batch_seg = model(batch)
28
+ # Remove extra dim [B, C, H, W] --> [B, H, W]
29
+ batch_seg = batch_seg.squeeze(1)
30
+ # Convert cuda-tensor into numpy arrays
31
+ batch_seg_np = batch_seg.detach().cpu().numpy()
32
+ # Fill empty array with segmented batch
33
+ masks[start_idx:start_idx + batch_size] = batch_seg_np >= 0.5
34
+
35
+ return masks
36
+
37
+ @staticmethod
38
+ def segment_algorithm(images, gpu_id=None, batch_size=50, *,
39
+ model_file: str = None):
40
+ """
41
+ Parameters
42
+ ----------
43
+ images: 3d ndarray
44
+ array of N event images of shape (N, H, W)
45
+ gpu_id: str
46
+ optional argument specifying the GPU to use
47
+ batch_size: int
48
+ number of images to process in one batch
49
+ model_file: str
50
+ path to or name of a dcnum model file (.dcnm); if only a
51
+ name is provided, then the "torch_model_files" directory
52
+ paths are searched for the file name
53
+
54
+ Returns
55
+ -------
56
+ mask: 2d boolean or integer ndarray
57
+ mask or label images of shape (N, H, W)
58
+ """
59
+ if model_file is None:
60
+ raise ValueError("Please specify a model file!")
61
+
62
+ # Determine device to use
63
+ device = torch.device(gpu_id if gpu_id is not None else "cuda")
64
+
65
+ # Load model and metadata
66
+ model, model_meta = load_model(model_file, device)
67
+
68
+ # Preprocess the images
69
+ image_preproc = preprocess_images(images,
70
+ **model_meta["preprocessing"])
71
+ # Model inference
72
+ # The `masks` array has the shape (len(images), H, W), where
73
+ # H and W may be different from the corresponding axes in `images`.
74
+ masks = SegmentTorchSTO._segment_in_batches(image_preproc,
75
+ model,
76
+ batch_size,
77
+ device
78
+ )
79
+
80
+ # Perform postprocessing in cases where the image shapes don't match
81
+ assert len(masks.shape[1:]) == len(images.shape[1:]), "sanity check"
82
+ if masks.shape[1:] != images.shape[1:]:
83
+ labels = postprocess_masks(
84
+ masks=masks,
85
+ original_image_shape=images.shape[1:])
86
+ return labels
87
+ else:
88
+ return masks
@@ -0,0 +1,95 @@
1
+ import errno
2
+ import functools
3
+ import hashlib
4
+ import json
5
+ import logging
6
+ import os
7
+ import pathlib
8
+
9
+ import torch
10
+
11
+ from ...meta import paths
12
+
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def check_md5sum(path):
18
+ """Verify the last five characters of the file stem with its MD5 hash"""
19
+ md5 = hashlib.md5(path.read_bytes()).hexdigest()
20
+ if md5[:5] != path.stem.split("_")[-1]:
21
+ raise ValueError(f"MD5 mismatch for {path} ({md5})! Expected the "
22
+ f"input file to end with '{md5[:5]}{path.suffix}'.")
23
+
24
+
25
+ @functools.cache
26
+ def load_model(path_or_name, device):
27
+ """Load a PyTorch model + metadata from a TorchScript jit checkpoint
28
+
29
+ Parameters
30
+ ----------
31
+ path_or_name: str or pathlib.Path
32
+ jit checkpoint file; For dcnum, these files have the suffix .dcnm
33
+ and contain a special `_extra_files["dcnum_meta.json"]` extra
34
+ file that can be loaded via `torch.jit.load` (see below).
35
+ device: str or torch.device
36
+ device on which to run the model
37
+
38
+ Returns
39
+ -------
40
+ model_jit: torch.jit.ScriptModule
41
+ loaded PyTorch model stored as a TorchScript module
42
+ model_meta: dict
43
+ metadata associated with the loaded model
44
+ """
45
+ model_path = retrieve_model_file(path_or_name)
46
+ # define an extra files mapping dictionary that loads the model's metadata
47
+ extra_files = {"dcnum_meta.json": ""}
48
+ # load model
49
+ model_jit = torch.jit.load(model_path,
50
+ _extra_files=extra_files,
51
+ map_location=device)
52
+ # load model metadata
53
+ model_meta = json.loads(extra_files["dcnum_meta.json"])
54
+ # set model to evaluation mode
55
+ model_jit.eval()
56
+ # optimize for inference on device
57
+ model_jit = torch.jit.optimize_for_inference(model_jit)
58
+ return model_jit, model_meta
59
+
60
+
61
+ @functools.cache
62
+ def retrieve_model_file(path_or_name):
63
+ """Retrieve a dcnum torch model file
64
+
65
+ If a path to a model is given, then this path is returned directly.
66
+ If a file name is given, then look for the file with
67
+ :func:`dcnum.meta.paths.find_file` using the "torch_model_file"
68
+ topic.
69
+ """
70
+ # Did the user already pass a path?
71
+ if isinstance(path_or_name, pathlib.Path):
72
+ if path_or_name.exists():
73
+ path = path_or_name
74
+ else:
75
+ try:
76
+ return retrieve_model_file(path_or_name.name)
77
+ except BaseException:
78
+ raise FileNotFoundError(errno.ENOENT,
79
+ os.strerror(errno.ENOENT),
80
+ str(path_or_name))
81
+ elif isinstance(path_or_name, str):
82
+ name = path_or_name.strip()
83
+ # We now have a string for a filename, and we have to figure out what
84
+ # the path is. There are several options, including cached files.
85
+ if pathlib.Path(name).exists():
86
+ path = pathlib.Path(name)
87
+ else:
88
+ path = paths.find_file("torch_model_files", name)
89
+ else:
90
+ raise ValueError(
91
+ f"Please pass a string or a path, got {type(path_or_name)}!")
92
+
93
+ logger.info(f"Found dcnum model file {path}")
94
+ check_md5sum(path)
95
+ return path
@@ -0,0 +1,93 @@
1
+ from typing import Tuple
2
+
3
+ from ..segmenter import Segmenter
4
+
5
+ import numpy as np
6
+ from scipy import ndimage as ndi
7
+
8
+
9
+ def postprocess_masks(masks,
10
+ original_image_shape: Tuple[int, int]):
11
+ """Postprocess mask images from ML segmenters
12
+
13
+ The transformation includes:
14
+ - Revert the cropping and padding operations done in
15
+ :func:`.preprocess_images` by padding with zeros and cropping.
16
+ - If the original image shape is larger than the mask image shape,
17
+ also clear borders in an intermediate step
18
+ (maks postprocessing using :func:`Segmenter.process_mask`).
19
+
20
+ Parameters
21
+ ----------
22
+ masks: 3d or 4d ndarray
23
+ Mask data in shape (batch_size, 1, imagex_size, imagey_size)
24
+ or (batch_size, imagex_size, imagey_size).
25
+ original_image_shape: tuple of (int, int)
26
+ The required output mask shape for one event. This required for
27
+ doing the inverse of what is done in :func:`.preprocess_images`.
28
+
29
+ Returns
30
+ -------
31
+ labels_proc: np.ndarray
32
+ An integer array with the same dimensions as the original image
33
+ data passed to :func:`.preprocess_images`. The shape of this array
34
+ is (batch_size, original_image_shape[0], original_image_shape[1]).
35
+ """
36
+ # If output of model is 4d, remove channel axis
37
+ if len(masks.shape) == 4:
38
+ masks = masks[:, 0, :, :]
39
+
40
+ # Label the mask image
41
+ labels = np.empty(masks.shape, dtype=np.uint16)
42
+ label_struct = ndi.generate_binary_structure(2, 2)
43
+ for ii in range(masks.shape[0]):
44
+ ndi.label(
45
+ input=masks[ii],
46
+ output=labels[ii],
47
+ structure=label_struct)
48
+
49
+ batch_size = labels.shape[0]
50
+
51
+ # Revert padding and cropping from preprocessing
52
+ mask_shape_ret = labels.shape[1:]
53
+ # height
54
+ s0diff = original_image_shape[0] - mask_shape_ret[0]
55
+ s0t = abs(s0diff) // 2
56
+ s0b = abs(s0diff) - s0t
57
+ # width
58
+ s1diff = original_image_shape[1] - mask_shape_ret[1]
59
+ s1l = abs(s1diff) // 2
60
+ s1r = abs(s1diff) - s1l
61
+
62
+ if s0diff > 0 or s1diff > 0:
63
+ # The masks that we have must be padded. Before we do that, we have
64
+ # to remove events on the edges, otherwise we will have half-segmented
65
+ # cell events in the output array.
66
+ for ii in range(batch_size):
67
+ labels[ii] = Segmenter.process_mask(labels[ii],
68
+ clear_border=True,
69
+ fill_holes=False,
70
+ closing_disk=0)
71
+
72
+ # Crop first, only then pad.
73
+ if s1diff > 0:
74
+ labels_pad = np.zeros((batch_size,
75
+ labels.shape[1],
76
+ original_image_shape[1]),
77
+ dtype=np.uint16)
78
+ labels_pad[:, :, s1l:-s1r] = labels
79
+ labels = labels_pad
80
+ elif s1diff < 0:
81
+ labels = labels[:, :, s1l:-s1r]
82
+
83
+ if s0diff > 0:
84
+ labels_pad = np.zeros((batch_size,
85
+ original_image_shape[0],
86
+ original_image_shape[1]),
87
+ dtype=np.uint16)
88
+ labels_pad[:, s0t:-s0b, :] = labels
89
+ labels = labels_pad
90
+ elif s0diff < 0:
91
+ labels = labels[:, s0t:-s0b, :]
92
+
93
+ return labels
@@ -0,0 +1,109 @@
1
+ from typing import Tuple
2
+
3
+ import numpy as np
4
+
5
+
6
+ def preprocess_images(images: np.ndarray,
7
+ norm_mean: float,
8
+ norm_std: float,
9
+ image_shape: Tuple[int, int] = None,
10
+ ):
11
+ """Transform image data to something torch models expect
12
+
13
+ The transformation includes:
14
+ - normalization (division by 255, subtraction of mean, division by std)
15
+ - cropping and padding of the input images to `image_shape`. For padding,
16
+ the median of each *individual* image is used.
17
+ - casting the input images to four dimensions
18
+ (batch_size, 1, height, width) where the second axis is "channels"
19
+
20
+ Parameters
21
+ ----------
22
+ images:
23
+ Input image array (batch_size, height_in, width_in). If this is a
24
+ 2D image, it will be reshaped to a 3D image with a batch_size of 1.
25
+ norm_mean:
26
+ Mean value used for standard score data normalization, i.e.
27
+ `normalized = `(images / 255 - norm_mean) / norm_std`
28
+ norm_std:
29
+ Standard deviation used for standard score data normalization
30
+ (see above)
31
+ image_shape
32
+ Image shape for which the model was created (height, width).
33
+ If the image shape does not match the input image shape, then
34
+ the input images are padded/cropped to fit the image shape of
35
+ the model.
36
+
37
+ Returns
38
+ -------
39
+ image_proc:
40
+ 3D array with preprocessed image data of shape
41
+ (batch_size, 1, height, width)
42
+ """
43
+ if len(images.shape) == 2:
44
+ # Insert indexing axis (batch dimension)
45
+ images = images[np.newaxis, :, :]
46
+
47
+ batch_size = images.shape[0]
48
+
49
+ # crop and pad the images based on what the model expects
50
+ image_shape_act = images.shape[1:]
51
+ if image_shape is None:
52
+ # model fits perfectly to input data
53
+ image_shape = image_shape_act
54
+
55
+ # height
56
+ hdiff = image_shape_act[0] - image_shape[0]
57
+ ht = abs(hdiff) // 2
58
+ hb = abs(hdiff) - ht
59
+ # width
60
+ wdiff = image_shape_act[1] - image_shape[1]
61
+ wl = abs(wdiff) // 2
62
+ wr = abs(wdiff) - wl
63
+ # helper variables
64
+ wpad = wdiff < 0
65
+ wcrp = wdiff > 0
66
+ hpad = hdiff < 0
67
+ hcrp = hdiff > 0
68
+
69
+ # The easy part is the cropping
70
+ if hcrp or wcrp:
71
+ # define slices for width and height
72
+ slice_hc = slice(ht, -hb) if hcrp else slice(None, None)
73
+ slice_wc = slice(wl, -wr) if wcrp else slice(None, None)
74
+ img_proc = images[:, slice_hc, slice_wc]
75
+ else:
76
+ img_proc = images
77
+
78
+ # The hard part is the padding
79
+ if hpad or wpad:
80
+ # compute median for each original input image
81
+ img_med = np.median(images, axis=(1, 2))
82
+ # broadcast the median array from 1D to 3D
83
+ img_med = img_med[:, None, None]
84
+
85
+ # define slices for width and height
86
+ slice_hp = slice(ht, -hb) if hpad else slice(None, None)
87
+ slice_wp = slice(wl, -wr) if wpad else slice(None, None)
88
+
89
+ # empty padding image stack with the shape required for the model
90
+ img_pad = np.empty(shape=(batch_size, image_shape[0], image_shape[1]),
91
+ dtype=np.float32)
92
+ # fill in original data
93
+ img_pad[:, slice_hp, slice_wp] = img_proc
94
+ # fill in background data for height
95
+ if hpad:
96
+ img_pad[:, :ht, :] = img_med
97
+ img_pad[:, -hb:, :] = img_med
98
+ # fill in background data for width
99
+ if wpad:
100
+ img_pad[:, :, :wl] = img_med
101
+ img_pad[:, :, -wr:] = img_med
102
+ # Replace img_norm
103
+ img_proc = img_pad
104
+
105
+ # normalize images
106
+ img_norm = (img_proc.astype(np.float32) / 255 - norm_mean) / norm_std
107
+
108
+ # Add a "channels" axis for the ML models.
109
+ return img_norm[:, np.newaxis, :, :]
dcnum/segm/segmenter.py CHANGED
@@ -13,6 +13,18 @@ from skimage import morphology
13
13
  from ..meta.ppid import kwargs_to_ppid, ppid_to_kwargs
14
14
 
15
15
 
16
+ class SegmenterNotApplicableError(BaseException):
17
+ """Used to indicate when a dataset cannot be segmented with a segmenter"""
18
+ def __init__(self, segmenter_class, reasons_list):
19
+ super(SegmenterNotApplicableError, self).__init__(
20
+ f"The dataset cannot be segmented with the "
21
+ f"'{segmenter_class.get_ppid_code()}' segmenter: "
22
+ f"{', '.join(reasons_list)}"
23
+ )
24
+ self.reasons_list = reasons_list
25
+ self.segmenter_class = segmenter_class
26
+
27
+
16
28
  class Segmenter(abc.ABC):
17
29
  #: Required hardware ("cpu" or "gpu") defined in first-level subclass.
18
30
  hardware_processor = "none"
@@ -88,7 +100,7 @@ class Segmenter(abc.ABC):
88
100
  """Return a unique segmentation pipeline identifier
89
101
 
90
102
  The pipeline identifier is universally applicable and must
91
- be backwards-compatible (future versions of dcevent will
103
+ be backwards-compatible (future versions of dcnum will
92
104
  correctly acknowledge the ID).
93
105
 
94
106
  The segmenter pipeline ID is defined as::
@@ -340,6 +352,34 @@ class Segmenter(abc.ABC):
340
352
  This is implemented in the MPO and STO segmenters.
341
353
  """
342
354
 
355
+ @classmethod
356
+ def validate_applicability(cls,
357
+ segmenter_kwargs: Dict,
358
+ meta: Dict = None,
359
+ logs: Dict = None):
360
+ """Validate the applicability of this segmenter for a dataset
361
+
362
+ Parameters
363
+ ----------
364
+ segmenter_kwargs: dict
365
+ Keyword arguments for the segmenter
366
+ meta: dict
367
+ Dictionary of metadata from an :class:`HDF5Data` instance
368
+ logs: dict
369
+ Dictionary of logs from an :class:`HDF5Data` instance
370
+
371
+ Returns
372
+ -------
373
+ applicable: bool
374
+ True if the segmenter is applicable to the dataset
375
+
376
+ Raises
377
+ ------
378
+ SegmenterNotApplicableError
379
+ If the segmenter is not applicable to the dataset
380
+ """
381
+ return True
382
+
343
383
 
344
384
  @functools.cache
345
385
  def get_available_segmenters():
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcnum
3
- Version: 0.22.0
3
+ Version: 0.23.0
4
4
  Summary: numerics toolbox for imaging deformability cytometry
5
- Author: Maximilian Schlögel, Paul Müller
5
+ Author: Maximilian Schlögel, Paul Müller, Raghava Alajangi
6
6
  Maintainer-email: Paul Müller <dev@craban.de>
7
7
  License: MIT
8
8
  Project-URL: source, https://github.com/DC-Analysis/dcnum
@@ -25,6 +25,8 @@ Requires-Dist: numpy >=1.21
25
25
  Requires-Dist: opencv-python-headless
26
26
  Requires-Dist: scikit-image
27
27
  Requires-Dist: scipy >=1.8.0
28
+ Provides-Extra: torch
29
+ Requires-Dist: torch >=2.3 ; extra == 'torch'
28
30
 
29
31
  |dcnum|
30
32
  =======
@@ -1,11 +1,11 @@
1
1
  dcnum/__init__.py,sha256=hcawIKS7utYiOyVhOAX9t7K3xYzP1b9862VV0b6qSrQ,74
2
- dcnum/_version.py,sha256=U42NllCG9uy3HhEJGou-86_Q3CYkSAexz4DzViMN24w,413
2
+ dcnum/_version.py,sha256=T70sCooCIGWJ8Xde9WLPaxYPJPHtQXVuIM1Xp42tyqE,413
3
3
  dcnum/feat/__init__.py,sha256=jUJYWTD3VIoDNKrmryXbjHb1rGwYtK4b7VPWihYgUoo,325
4
4
  dcnum/feat/event_extractor_manager_thread.py,sha256=5HdCQCywyQ5QC56AMjSqCroqif9oOFyiSFWCe07JojM,7820
5
- dcnum/feat/gate.py,sha256=svbObmqpYdqPawpfrsEjTiUPJXf24GrNi8PXTKT-z44,7225
6
- dcnum/feat/queue_event_extractor.py,sha256=bNdYzMPto37FCIgBbBw-YRQ2TlTpJKCWj9r_Y4sak3E,15700
5
+ dcnum/feat/gate.py,sha256=Yhxq80JoRMmQzBxl35C8NT91c9QcmQa-EIKLuxK6WvE,7221
6
+ dcnum/feat/queue_event_extractor.py,sha256=0ncTQleT1sfc98zYkFuZWxU-akecfTrW6-OOU3z-d8o,15698
7
7
  dcnum/feat/feat_background/__init__.py,sha256=OTmMuazHNaSrZb2XW4cnJ6PlgJLbKrPbaidpEixYa0A,341
8
- dcnum/feat/feat_background/base.py,sha256=phZdyOrHQPjvYlw1JQ8DkdXw5H2-eE1LfLGqCAo1rlo,7965
8
+ dcnum/feat/feat_background/base.py,sha256=A-K3qlJ0ABFBGm5eMKYcNCC7ktFAInSm0eR3N3DHQZY,7963
9
9
  dcnum/feat/feat_background/bg_copy.py,sha256=PK8x4_Uph-_A6uszZC5uhe1gD1dSRdHnDMEsN0HSGHA,1034
10
10
  dcnum/feat/feat_background/bg_roll_median.py,sha256=EyjstMDXFBYuJB1lN6g4Uw7tPm434X3hXQxKSqvcoJ4,13175
11
11
  dcnum/feat/feat_background/bg_sparse_median.py,sha256=ab7Boj7cmr6PBdTbyWTj_yNNJSfuowr7u-iSGW989WI,20709
@@ -18,9 +18,9 @@ dcnum/feat/feat_contour/moments.py,sha256=W8sD2X7JqIBq-9nL82hf4Hm2uJkfca8EvAl_hq
18
18
  dcnum/feat/feat_contour/volume.py,sha256=xVHWtv6USUHJZ5dM1Ur7fI7OwoPT5N2Ps0gKVWylfl8,6639
19
19
  dcnum/feat/feat_texture/__init__.py,sha256=6StM9S540UVtdFFR3bHa7nfCTomeVdoo7Uy9CjuTgH0,137
20
20
  dcnum/feat/feat_texture/common.py,sha256=COXHpXS-7DMouGu3WF83I76L02Sr7P9re4lxajh6g0E,439
21
- dcnum/feat/feat_texture/tex_all.py,sha256=eGjjNfPpfZw7FA_VNFCIMiU38KD0qcGbxLciYy-tCiA,4097
21
+ dcnum/feat/feat_texture/tex_all.py,sha256=_5H3sXYRN0Uq2eUHn3XUyEHkU_tncEqbqJTC-HZcnGY,5198
22
22
  dcnum/logic/__init__.py,sha256=7J3GrwJInNQbrLk61HRIV7X7p69TAIbMYpR34hh6u14,177
23
- dcnum/logic/ctrl.py,sha256=th9xKVqXtmscCteU6Vum3wZb-H2RSyKL5kNpCDEXrlU,34792
23
+ dcnum/logic/ctrl.py,sha256=FyVlizHOIaIGMqINvM-outPywAQU0-5NM7t1dEDml4c,35332
24
24
  dcnum/logic/job.py,sha256=H1uDZ1nnNHNWWCe6mS8OWB0Uxc6XUKLISx5xExeplZY,7015
25
25
  dcnum/logic/json_encoder.py,sha256=cxMnqisbKEVf-rVcw6rK2BBAb6iz_hKFaGl81kK36lQ,571
26
26
  dcnum/meta/__init__.py,sha256=AVqRgyKXO1orKnE305h88IBvoZ1oz6X11HN1WP5nGvg,60
@@ -31,18 +31,25 @@ dcnum/read/cache.py,sha256=lisrGG7AyvVitf0h92wh5FvYCsxa0pWyGcAyYwGP-LQ,6471
31
31
  dcnum/read/const.py,sha256=GG9iyXDtEldvJYOBnhZjlimzIeBMAt4bSr2-xn2gzzc,464
32
32
  dcnum/read/hdf5_data.py,sha256=Yyq02UTILc5ZgIQXpR9Y0wuX2WT8s0g23PraI7KxmJY,23489
33
33
  dcnum/read/mapped.py,sha256=UryArlrIsHxjOyimBL2Nooi3r73zuGtnGdqdxa6PK_g,3076
34
- dcnum/segm/__init__.py,sha256=IVP5lv8dTqo25CYLnckHX-4yFsJFraATlWD60KXLL6w,247
34
+ dcnum/segm/__init__.py,sha256=9cLEAd3JWE8IGqDHV-eSDIYOGBfOepd8OcebtNs8Omk,309
35
35
  dcnum/segm/segm_thresh.py,sha256=iVhvIhzO0Gw0t3rXOgH71rOI0CNjJJQq4Gg6BulUhK8,948
36
- dcnum/segm/segmenter.py,sha256=C04cMQmT4K8oZa-CZnYzxXvHZy_UQd3WduF8vQNIgvE,13546
36
+ dcnum/segm/segmenter.py,sha256=FWLFDBR-x_85ku2rObA2F-QBrM4IUaUL-YHChLagVvM,14902
37
37
  dcnum/segm/segmenter_manager_thread.py,sha256=frM0sMxC7f7TQiFjmpRxuwG2kUBFpW1inV8dtpADHiI,5924
38
38
  dcnum/segm/segmenter_mpo.py,sha256=o6mQlITHgEWvQt9v6oCWwAcZUvxE7MOeLE9DFManzpY,13757
39
39
  dcnum/segm/segmenter_sto.py,sha256=e6MtN_RWusA0wTExV-FLGpDXNJs1CbSyXcSdWUPBMvM,3959
40
+ dcnum/segm/segm_torch/__init__.py,sha256=re9jVLYvV1GgC7J5vx2LHKeFYVZPpiwubecAV9f_2kA,670
41
+ dcnum/segm/segm_torch/segm_torch_base.py,sha256=G9AhVyD6LkAmk0tkbYnJUSpvcj3_HYf0uqfILZQsyus,4479
42
+ dcnum/segm/segm_torch/segm_torch_mpo.py,sha256=N01dVXai_4eIGfHJrPjg5C2Bkyq1TOeXeJhw3YbGidw,2504
43
+ dcnum/segm/segm_torch/segm_torch_sto.py,sha256=PTOJrP_FkaxZZul8lM4VA2HL3KyxrheDDWWdJbmJdiw,3393
44
+ dcnum/segm/segm_torch/torch_model.py,sha256=5aL6SwSvg1N2gATEGBhP3aA4WTHlvGzQVYuizmh0LrU,3187
45
+ dcnum/segm/segm_torch/torch_postproc.py,sha256=ctirQTmsZnuZGIxkwFWN9arRneHRYJUxaJ_ZyCgjByM,3311
46
+ dcnum/segm/segm_torch/torch_preproc.py,sha256=Ik_HRxd14pA7FYT5jv-pUkXMWDZrsiGfsEiCsjvSGhU,3762
40
47
  dcnum/write/__init__.py,sha256=QvWHeZmjHI18i-YlGYuzN3i7dVWY9UCReKchrJ-gif0,260
41
48
  dcnum/write/deque_writer_thread.py,sha256=ao7F1yrVKyufgC4rC0Y2_Vt7snuT6KpI7W2qVxcjdhk,1994
42
49
  dcnum/write/queue_collector_thread.py,sha256=d_WfdsZdFnFsiAY0zVMwUlA4juIMeiWYmE_-rezBQCE,11734
43
50
  dcnum/write/writer.py,sha256=e6J8YVqhS7kzkpPIMoDMokJpqSy1WWNdOrwaJof1oVc,15601
44
- dcnum-0.22.0.dist-info/LICENSE,sha256=YRChA1C8A2E-amJbudwMcbTCZy_HzmeY0hMIvduh1MM,1089
45
- dcnum-0.22.0.dist-info/METADATA,sha256=6Qi51lajhxBvhgWzDNXSfcImZr4MroMlCZ1OLNmBrqw,2194
46
- dcnum-0.22.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
47
- dcnum-0.22.0.dist-info/top_level.txt,sha256=Hmh38rgG_MFTVDpUDGuO2HWTSq80P585Het4COQzFTg,6
48
- dcnum-0.22.0.dist-info/RECORD,,
51
+ dcnum-0.23.0.dist-info/LICENSE,sha256=YRChA1C8A2E-amJbudwMcbTCZy_HzmeY0hMIvduh1MM,1089
52
+ dcnum-0.23.0.dist-info/METADATA,sha256=aQzkZcqw9Qh5abdO9ogxYET8cOefG_MngQ8n8AAvRnU,2280
53
+ dcnum-0.23.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
54
+ dcnum-0.23.0.dist-info/top_level.txt,sha256=Hmh38rgG_MFTVDpUDGuO2HWTSq80P585Het4COQzFTg,6
55
+ dcnum-0.23.0.dist-info/RECORD,,
File without changes