PyPI - dcnum - Versions diffs - 0.17.2__py3-none-any.whl → 0.19.0__py3-none-any.whl - Mend

dcnum 0.17.2py3-none-any.whl → 0.19.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dcnum might be problematic. Click here for more details.

Files changed (32) hide show

dcnum/_version.py +2 -2
dcnum/feat/__init__.py +1 -1
dcnum/feat/feat_background/base.py +18 -22
dcnum/feat/feat_background/bg_copy.py +8 -4
dcnum/feat/feat_background/bg_roll_median.py +19 -9
dcnum/feat/feat_background/bg_sparse_median.py +53 -5
dcnum/feat/feat_brightness/bright_all.py +41 -6
dcnum/feat/feat_contour/__init__.py +4 -0
dcnum/feat/{feat_moments/mt_legacy.py → feat_contour/moments.py} +32 -8
dcnum/feat/feat_contour/volume.py +174 -0
dcnum/feat/queue_event_extractor.py +25 -4
dcnum/logic/ctrl.py +18 -3
dcnum/logic/json_encoder.py +2 -0
dcnum/meta/ppid.py +1 -1
dcnum/read/__init__.py +1 -0
dcnum/read/cache.py +78 -78
dcnum/read/const.py +4 -1
dcnum/read/hdf5_data.py +112 -17
dcnum/read/mapped.py +79 -0
dcnum/segm/segm_thresh.py +3 -3
dcnum/segm/segmenter.py +73 -42
dcnum/segm/segmenter_cpu.py +5 -5
dcnum/segm/segmenter_manager_thread.py +11 -2
dcnum/write/writer.py +37 -5
{dcnum-0.17.2.dist-info → dcnum-0.19.0.dist-info}/METADATA +1 -1
dcnum-0.19.0.dist-info/RECORD +48 -0
dcnum/feat/feat_moments/__init__.py +0 -4
dcnum-0.17.2.dist-info/RECORD +0 -46
/dcnum/feat/{feat_moments/ct_opencv.py → feat_contour/contour.py} +0 -0
{dcnum-0.17.2.dist-info → dcnum-0.19.0.dist-info}/LICENSE +0 -0
{dcnum-0.17.2.dist-info → dcnum-0.19.0.dist-info}/WHEEL +0 -0
{dcnum-0.17.2.dist-info → dcnum-0.19.0.dist-info}/top_level.txt +0 -0

dcnum/logic/ctrl.py CHANGED Viewed

@@ -310,7 +310,17 @@ class DCNumJobRunner(threading.Thread):
             # Whether pipeline hash is invalid.
             ppid.compute_pipeline_hash(**datdict) != dathash
             # Whether the input file is the original output of the pipeline.
-            or len(self.draw) != evyield)
+            or len(self.draw) != evyield
+            # If index mapping is defined, then we always redo the pipeline.
+            # If the pipeline hashes are identical and index mapping is not
+            # None, then both pipelines were done with index mapping.
+            # But applying the same pipeline with index mapping in series
+            # will lead to a different result in the second run (e.g. 1st
+            # pipeline run: take every 2nd event; 2nd pipeline run: take
+            # every second event -> results in every 4th event in output of
+            # second pipeline run).
+            or self.draw.index_mapping is not None
+        )
         # Do we have to recompute the background data? In addition to the
         # hash sanity check above, check the generation, input data,
         # and background pipeline identifiers.
@@ -382,6 +392,10 @@ class DCNumJobRunner(threading.Thread):
             hw.h5.attrs["pipeline:dcnum gate"] = self.ppdict["gate_id"]
             hw.h5.attrs["pipeline:dcnum hash"] = self.pphash
             hw.h5.attrs["pipeline:dcnum yield"] = self.event_count
+            # index mapping information
+            im = self.job.kwargs["data_kwargs"].get("index_mapping", None)
+            dim = HDF5Data.get_ppid_index_mapping(im)
+            hw.h5.attrs["pipeline:dcnum mapping"] = dim
             # regular metadata
             hw.h5.attrs["experiment:event count"] = self.event_count
             hw.h5.attrs["imaging:pixel size"] = self.draw.pixel_size
@@ -503,7 +517,7 @@ class DCNumJobRunner(threading.Thread):
             num_segmenters = 1
         elif seg_cls.hardware_processor == "cpu":  # CPU segmenter
             # We could in principle set the number of slots to one and
-            # jave both number of extractors and number of segmenters set
+            # have both number of extractors and number of segmenters set
             # to the total number of CPUs. However, we would need more RAM
             # (for caching the image data) and we also have more overhead.
             # Having two slots shared between all workers is more efficient.
@@ -522,10 +536,11 @@ class DCNumJobRunner(threading.Thread):
         slot_chunks = mp_spawn.Array("i", num_slots)
         slot_states = mp_spawn.Array("u", num_slots)
-        # Initialize thread
+        # Initialize segmenter manager thread
         thr_segm = SegmenterManagerThread(
             segmenter=seg_cls(**self.job["segmenter_kwargs"]),
             image_data=imdat,
+            bg_off=self.dtin["bg_off"] if "bg_off" in self.dtin else None,
             slot_states=slot_states,
             slot_chunks=slot_chunks,
             debug=self.job["debug"],

dcnum/logic/json_encoder.py CHANGED Viewed

@@ -13,5 +13,7 @@ class ExtendedJSONEncoder(json.JSONEncoder):
             return int(obj)
         elif isinstance(obj, np.bool_):
             return bool(obj)
+        elif isinstance(obj, slice):
+            return "PYTHON-SLICE", (obj.start, obj.stop, obj.step)
         # Let the base class default method raise the TypeError
         return json.JSONEncoder.default(self, obj)

dcnum/meta/ppid.py CHANGED Viewed

@@ -10,7 +10,7 @@ import warnings
 #: Increment this string if there are breaking changes that make
 #: previous pipelines unreproducible.
-DCNUM_PPID_GENERATION = "7"
+DCNUM_PPID_GENERATION = "8"
 class ClassWithPPIDCapabilities(Protocol):

dcnum/read/__init__.py CHANGED Viewed

@@ -2,3 +2,4 @@
 from .cache import md5sum
 from .const import PROTECTED_FEATURES
 from .hdf5_data import HDF5Data, HDF5ImageCache, concatenated_hdf5_data
+from .mapped import get_mapping_indices, get_mapped_object

dcnum/read/cache.py CHANGED Viewed

@@ -1,7 +1,9 @@
+import abc
 import collections
 import functools
 import hashlib
 import pathlib
+from typing import Tuple
 import warnings
 import h5py
@@ -13,41 +15,34 @@ class EmptyDatasetWarning(UserWarning):
     pass
-class HDF5ImageCache:
+class BaseImageChunkCache(abc.ABC):
     def __init__(self,
-                 h5ds: h5py.Dataset,
+                 shape: Tuple[int],
                  chunk_size: int = 1000,
                  cache_size: int = 2,
-                 boolean: bool = False):
-        """An HDF5 image cache
-        Deformability cytometry data files commonly contain image stacks
-        that are chunked in various ways. Loading just a single image
-        can be time-consuming, because an entire HDF5 chunk has to be
-        loaded, decompressed and from that one image extracted. The
-        `HDF5ImageCache` class caches the chunks from the HDF5 files
-        into memory, making single-image-access very fast.
-        """
-        self.shape = h5ds.shape
+                 ):
+        self.shape = shape
+        chunk_size = min(shape[0], chunk_size)
         self._len = self.shape[0]
-        if self._len == 0:
-            warnings.warn(f"Input image '{h5ds.name}' in "
-                          f"file {h5ds.file.filename} has zero length",
-                          EmptyDatasetWarning)
-        # TODO:
-        # - adjust chunking to multiples of the chunks in the dataset
-        #   (which might slightly speed up things)
-        chunk_size = min(h5ds.shape[0], chunk_size)
-        self.h5ds = h5ds
-        self.chunk_size = chunk_size
-        self.boolean = boolean
-        self.cache_size = cache_size
         #: This is a FILO cache for the chunks
         self.cache = collections.OrderedDict()
         self.image_shape = self.shape[1:]
         self.chunk_shape = (chunk_size,) + self.shape[1:]
+        self.chunk_size = chunk_size
+        self.cache_size = cache_size
         self.num_chunks = int(np.ceil(self._len / (self.chunk_size or 1)))
+    def __getitem__(self, index):
+        chunk_index, sub_index = self._get_chunk_index_for_index(index)
+        return self.get_chunk(chunk_index)[sub_index]
+    def __len__(self):
+        return self._len
+    @abc.abstractmethod
+    def _get_chunk_data(self, chunk_slice):
+        """Implemented in subclass to obtain actual data"""
     def _get_chunk_index_for_index(self, index):
         if index < 0:
             index = self._len + index
@@ -59,26 +54,14 @@ class HDF5ImageCache:
         sub_index = index % self.chunk_size
         return chunk_index, sub_index
-    def __getitem__(self, index):
-        chunk_index, sub_index = self._get_chunk_index_for_index(index)
-        return self.get_chunk(chunk_index)[sub_index]
-    def __len__(self):
-        return self._len
     def get_chunk(self, chunk_index):
         """Return one chunk of images"""
         if chunk_index not in self.cache:
-            fslice = slice(self.chunk_size * chunk_index,
-                           self.chunk_size * (chunk_index + 1)
-                           )
-            data = self.h5ds[fslice]
-            if self.boolean:
-                data = np.array(data, dtype=bool)
-            self.cache[chunk_index] = data
-            if len(self.cache) > self.cache_size:
+            if len(self.cache) >= self.cache_size:
                 # Remove the first item
                 self.cache.popitem(last=False)
+            data = self._get_chunk_data(self.get_chunk_slice(chunk_index))
+            self.cache[chunk_index] = data
         return self.cache[chunk_index]
     def get_chunk_size(self, chunk_index):
@@ -91,60 +74,77 @@ class HDF5ImageCache:
                 raise IndexError(f"{self} only has {self.num_chunks} chunks!")
             return chunk_size
+    def get_chunk_slice(self, chunk_index):
+        """Return the slice corresponding to the chunk index"""
+        ch_slice = slice(self.chunk_size * chunk_index,
+                         self.chunk_size * (chunk_index + 1)
+                         )
+        return ch_slice
     def iter_chunks(self):
-        size = self.h5ds.shape[0]
         index = 0
         chunk = 0
         while True:
             yield chunk
             chunk += 1
             index += self.chunk_size
-            if index >= size:
+            if index >= self._len:
                 break
-class ImageCorrCache:
+class HDF5ImageCache(BaseImageChunkCache):
     def __init__(self,
-                 image: HDF5ImageCache,
-                 image_bg: HDF5ImageCache):
-        self.image = image
-        self.image_bg = image_bg
-        self.chunk_size = image.chunk_size
-        self.num_chunks = image.num_chunks
-        self.h5ds = image.h5ds
-        self.shape = image.shape
-        self.chunk_shape = image.chunk_shape
-        #: This is a FILO cache for the corrected image chunks
-        self.cache = collections.OrderedDict()
-        self.cache_size = image.cache_size
+                 h5ds: h5py.Dataset,
+                 chunk_size: int = 1000,
+                 cache_size: int = 2,
+                 boolean: bool = False):
+        """An HDF5 image cache
-    def _get_chunk_index_for_index(self, index):
-        if index < 0:
-            index = len(self.h5ds) + index
-        chunk_index = index // self.chunk_size
-        sub_index = index % self.chunk_size
-        return chunk_index, sub_index
+        Deformability cytometry data files commonly contain image stacks
+        that are chunked in various ways. Loading just a single image
+        can be time-consuming, because an entire HDF5 chunk has to be
+        loaded, decompressed and from that one image extracted. The
+        `HDF5ImageCache` class caches the chunks from the HDF5 files
+        into memory, making single-image-access very fast.
+        """
+        super(HDF5ImageCache, self).__init__(
+            shape=h5ds.shape,
+            chunk_size=chunk_size,
+            cache_size=cache_size)
+        # TODO:
+        # - adjust chunking to multiples of the chunks in the dataset
+        #   (which might slightly speed up things)
+        self.h5ds = h5ds
+        self.boolean = boolean
-    def __getitem__(self, index):
-        chunk_index, sub_index = self._get_chunk_index_for_index(index)
-        return self.get_chunk(chunk_index)[sub_index]
+        if self._len == 0:
+            warnings.warn(f"Input image '{h5ds.name}' in "
+                          f"file {h5ds.file.filename} has zero length",
+                          EmptyDatasetWarning)
-    def __len__(self):
-        return len(self.image)
+    def _get_chunk_data(self, chunk_slice):
+        data = self.h5ds[chunk_slice]
+        if self.boolean:
+            data = np.array(data, dtype=bool)
+        return data
-    def get_chunk(self, chunk_index):
-        if chunk_index not in self.cache:
-            data = np.array(
-                self.image.get_chunk(chunk_index), dtype=np.int16) \
-                - self.image_bg.get_chunk(chunk_index)
-            self.cache[chunk_index] = data
-            if len(self.cache) > self.cache_size:
-                # Remove the first item
-                self.cache.popitem(last=False)
-        return self.cache[chunk_index]
-    def iter_chunks(self):
-        return self.image.iter_chunks()
+class ImageCorrCache(BaseImageChunkCache):
+    def __init__(self,
+                 image: HDF5ImageCache,
+                 image_bg: HDF5ImageCache):
+        super(ImageCorrCache, self).__init__(
+            shape=image.shape,
+            chunk_size=image.chunk_size,
+            cache_size=image.cache_size)
+        self.image = image
+        self.image_bg = image_bg
+    def _get_chunk_data(self, chunk_slice):
+        data = np.array(
+            self.image._get_chunk_data(chunk_slice), dtype=np.int16) \
+           - self.image_bg._get_chunk_data(chunk_slice)
+        return data
 @functools.cache

dcnum/read/const.py CHANGED Viewed

@@ -1,6 +1,7 @@
 #: Scalar features that apply to all events in a frame and which are
-#: not computed from image or image_bg data.
+#: not computed for individual events.
 PROTECTED_FEATURES = [
+    "bg_off",
     "flow_rate",
     "frame",
     "g_force",
@@ -10,5 +11,7 @@ PROTECTED_FEATURES = [
     "time"
 ]
+# User-defined features may be anything, but if the user needs something
+# very specific for the pipeline, having them protected is a nice feature.
 for ii in range(10):
     PROTECTED_FEATURES.append(f"userdef{ii}")

dcnum/read/hdf5_data.py CHANGED Viewed

@@ -1,7 +1,9 @@
 from __future__ import annotations
+import hashlib
 import io
 import json
+import numbers
 import pathlib
 import tempfile
 from typing import Dict, BinaryIO, List
@@ -13,6 +15,7 @@ import numpy as np
 from .cache import HDF5ImageCache, ImageCorrCache, md5sum
 from .const import PROTECTED_FEATURES
+from .mapped import get_mapped_object, get_mapping_indices
 class HDF5Data:
@@ -26,12 +29,47 @@ class HDF5Data:
                  logs: Dict[List[str]] = None,
                  tables: Dict[np.ndarray] = None,
                  image_cache_size: int = 2,
+                 index_mapping: int | slice | List | np.ndarray = None,
                  ):
+        """
+        Parameters
+        ----------
+        path:
+            path to data file
+        pixel_size:
+            pixel size in µm
+        md5_5m:
+            MD5 sum of the first 5 MiB; computed if not provided
+        meta:
+            metadata dictionary; extracted from HDF5 attributes
+            if not provided
+        basins:
+            list of basin dictionaries; extracted from HDF5 attributes
+            if not provided
+        logs:
+            dictionary of logs; extracted from HDF5 attributes
+            if not provided
+        tables:
+            dictionary of tables; extracted from HDF5 attributes
+            if not provided
+        image_cache_size:
+            size of the image cache to use when accessing image data
+        index_mapping:
+            select only a subset of input events, transparently reducing the
+            size of the dataset, possible data types are
+            - int `N`: use the first `N` events
+            - slice: use the events defined by a slice
+            - list: list of integers specifying the event indices to use
+            Numpy indexing rules apply. E.g. to only process the first
+            100 events, set this to `100` or `slice(0, 100)`.
+        """
         # Init is in __setstate__ so we can pickle this class
         # and use it for multiprocessing.
         if isinstance(path, h5py.File):
             self.h5 = path
             path = path.filename
         self.__setstate__({"path": path,
                            "pixel_size": pixel_size,
                            "md5_5m": md5_5m,
@@ -40,6 +78,7 @@ class HDF5Data:
                            "logs": logs,
                            "tables": tables,
                            "image_cache_size": image_cache_size,
+                           "index_mapping": index_mapping,
                            })
     def __contains__(self, item):
@@ -53,7 +92,7 @@ class HDF5Data:
     def __getitem__(self, feat):
         if feat in ["image", "image_bg", "mask"]:
-            data = self.get_image_cache(feat)
+            data = self.get_image_cache(feat)  # already index-mapped
             if data is None:
                 raise KeyError(f"Feature '{feat}' not found in {self}!")
             else:
@@ -62,19 +101,25 @@ class HDF5Data:
             return self._cache_scalar[feat]
         elif (feat in self.h5["events"]
               and len(self.h5["events"][feat].shape) == 1):  # cache scalar
-            self._cache_scalar[feat] = self.h5["events"][feat][:]
+            if self.index_mapping is None:
+                idx_map = slice(None)  # no mapping indices, just slice
+            else:
+                idx_map = get_mapping_indices(self.index_mapping)
+            self._cache_scalar[feat] = self.h5["events"][feat][idx_map]
             return self._cache_scalar[feat]
         else:
             if feat in self.h5["events"]:
                 # Not cached (possibly slow)
                 warnings.warn(f"Feature {feat} not cached (possibly slow)")
-                return self.h5["events"][feat]
+                return get_mapped_object(
+                    obj=self.h5["events"][feat],
+                    index_mapping=self.index_mapping)
             else:
                 # Check the basins
                 for idx in range(len(self.basins)):
                     bn, bn_features = self.get_basin_data(idx)
                     if bn_features and feat in bn_features:
-                        return bn[feat]
+                        return bn[feat]  # already index-mapped
         # If we got here, then the feature data does not exist.
         raise KeyError(f"Feature '{feat}' not found in {self}!")
@@ -86,13 +131,14 @@ class HDF5Data:
                 "logs": self.logs,
                 "tables": self.tables,
                 "basins": self.basins,
-                "image_cache_size": self.image.cache_size
+                "image_cache_size": self.image.cache_size,
+                "index_mapping": self.index_mapping,
                 }
     def __setstate__(self, state):
         # Make sure these properties exist (we rely on __init__, because
         # we want this class to be pickable and __init__ is not called by
-        # `pickle.load`.
+        # `pickle.load`).
         # Cached properties
         self._feats = None
         self._keys = None
@@ -116,7 +162,7 @@ class HDF5Data:
         if self.md5_5m is None:
             if isinstance(self.path, pathlib.Path):
                 # 5MB md5sum of input file
-                self.md5_5m = md5sum(self.path, count=80)
+                self.md5_5m = md5sum(self.path, blocksize=65536, count=80)
             else:
                 self.md5_5m = str(uuid.uuid4()).replace("-", "")
         self.meta = state["meta"]
@@ -165,12 +211,17 @@ class HDF5Data:
         self.image_cache_size = state["image_cache_size"]
+        self.index_mapping = state["index_mapping"]
         if self.h5 is None:
             self.h5 = h5py.File(self.path, libver="latest")
     def __len__(self):
         if self._len is None:
-            self._len = self.h5.attrs["experiment:event count"]
+            if self.index_mapping is not None:
+                self._len = get_mapping_indices(self.index_mapping).size
+            else:
+                self._len = self.h5.attrs["experiment:event count"]
         return self._len
     @property
@@ -244,7 +295,9 @@ class HDF5Data:
         self.h5.close()
     def get_ppid(self):
-        return self.get_ppid_from_ppkw({"pixel_size": self.pixel_size})
+        return self.get_ppid_from_ppkw(
+            {"pixel_size": self.pixel_size,
+             "index_mapping": self.index_mapping})
     @classmethod
     def get_ppid_code(cls):
@@ -255,20 +308,60 @@ class HDF5Data:
         # Data does not really fit into the PPID scheme we use for the rest
         # of the pipeline. This implementation here is custom.
         code = cls.get_ppid_code()
-        kwid = f"p={kwargs['pixel_size']:.8f}".rstrip("0")
+        # pixel size
+        ppid_ps = f"{kwargs['pixel_size']:.8f}".rstrip("0")
+        # index mapping
+        ppid_im = cls.get_ppid_index_mapping(kwargs.get("index_mapping", None))
+        kwid = "^".join([f"p={ppid_ps}", f"i={ppid_im}"])
         return ":".join([code, kwid])
+    @staticmethod
+    def get_ppid_index_mapping(index_mapping):
+        """Return the pipeline identifier part for index mapping"""
+        im = index_mapping
+        if im is None:
+            dim = "0"
+        elif isinstance(im, numbers.Integral):
+            dim = f"{im}"
+        elif isinstance(im, slice):
+            dim = (f"{im.start if im.start is not None else 'n'}"
+                   + f"-{im.stop if im.stop is not None else 'n'}"
+                   + f"-{im.step if im.step is not None else 'n'}"
+                   )
+        elif isinstance(im, (list, np.ndarray)):
+            idhash = hashlib.md5(
+                np.array(im, dtype=np.uint32).tobytes()).hexdigest()
+            dim = f"h-{idhash[:8]}"
+        else:
+            dim = "unknown"
+        return dim
     @staticmethod
     def get_ppkw_from_ppid(dat_ppid):
         # Data does not fit in the PPID scheme we use, but we still
         # would like to pass pixel_size to __init__ if we need it.
-        code, pp_dat_kwargs = dat_ppid.split(":")
+        code, kwargs_str = dat_ppid.split(":")
         if code != HDF5Data.get_ppid_code():
             raise ValueError(f"Could not find data method '{code}'!")
-        p, val = pp_dat_kwargs.split("=")
-        if p != "p":
-            raise ValueError(f"Invalid parameter '{p}'!")
-        return {"pixel_size": float(val)}
+        kwitems = kwargs_str.split("^")
+        kwargs = {}
+        for item in kwitems:
+            var, val = item.split("=")
+            if var == "p":
+                kwargs["pixel_size"] = float(val)
+            elif var == "i":
+                if val.startswith("h-") or val == "unknown":
+                    raise ValueError(f"Cannot invert index mapping {val}")
+                elif val == "0":
+                    kwargs["index_mapping"] = None
+                elif val.count("-"):
+                    start, stop = [int(v) for v in val.split("-")]
+                    kwargs["index_mapping"] = slice(start, stop)
+                else:
+                    kwargs["index_mapping"] = int(val)
+            else:
+                raise ValueError(f"Invalid parameter '{var}'!")
+        return kwargs
     def get_basin_data(self, index):
         """Return HDF5Data info for a basin index in `self.basins`
@@ -298,7 +391,7 @@ class HDF5Data:
             if path is None:
                 self._basin_data[index] = (None, None)
             else:
-                h5dat = HDF5Data(path)
+                h5dat = HDF5Data(path, index_mapping=self.index_mapping)
                 features = bn_dict.get("features")
                 if features is None:
                     # Only get the features from the actual HDF5 file.
@@ -336,7 +429,8 @@ class HDF5Data:
             if ds is not None:
                 image = HDF5ImageCache(
-                    h5ds=ds,
+                    h5ds=get_mapped_object(obj=ds,
+                                           index_mapping=self.index_mapping),
                     cache_size=self.image_cache_size,
                     boolean=feat == "mask")
             else:
@@ -386,6 +480,7 @@ def concatenated_hdf5_data(paths: List[pathlib.Path],
     - If one of the input files does not contain a feature from the first
       input `paths`, then a `ValueError` is raised. Use the `features`
       argument to specify which features you need instead.
+    - Basins are not considered.
     """
     h5kwargs = {"mode": "w", "libver": "latest"}
     if isinstance(path_out, (pathlib.Path, str)):

dcnum/read/mapped.py ADDED Viewed

@@ -0,0 +1,79 @@
+import functools
+import numbers
+import h5py
+import numpy as np
+class MappedHDF5Dataset:
+    def __init__(self,
+                 h5ds: h5py.Dataset,
+                 mapping_indices: np.ndarray):
+        """An index-mapped object for accessing an HDF5 dataset
+        Parameters
+        ----------
+        h5ds: h5py.Dataset
+            HDF5 dataset from which to map data
+        mapping_indices: np.ndarray
+            numpy indexing array containing integer indices
+        """
+        self.h5ds = h5ds
+        self.mapping_indices = mapping_indices
+        self.shape = (mapping_indices.size,) + h5ds.shape[1:]
+    def __getitem__(self, idx):
+        if isinstance(idx, numbers.Integral):
+            return self.h5ds[self.mapping_indices[idx]]
+        else:
+            idx_mapped = self.mapping_indices[idx]
+            return self.h5ds[idx_mapped]
+def get_mapping_indices(
+        index_mapping: numbers.Integral | slice | list | np.ndarray
+        ):
+    if isinstance(index_mapping, numbers.Integral):
+        return _get_mapping_indices_cached(index_mapping)
+    elif isinstance(index_mapping, slice):
+        return _get_mapping_indices_cached(
+            (index_mapping.start, index_mapping.stop, index_mapping.step))
+    elif isinstance(index_mapping, (np.ndarray, list)):
+        return np.array(index_mapping, dtype=np.uint32)
+    else:
+        raise ValueError(f"Invalid type for `index_mapping`: "
+                         f"{type(index_mapping)} ({index_mapping})")
+@functools.lru_cache(maxsize=100)
+def _get_mapping_indices_cached(
+        index_mapping: numbers.Integral | tuple
+        ):
+    if isinstance(index_mapping, numbers.Integral):
+        return np.arange(index_mapping)
+    elif isinstance(index_mapping, tuple):
+        im_slice = slice(*index_mapping)
+        if im_slice.step is not None:
+            raise NotImplementedError("Slices with step not implemented yet")
+        if im_slice.stop is None or im_slice.start is None:
+            raise NotImplementedError(
+                "Slices must have start and stop defined")
+        return np.arange(im_slice.start, im_slice.stop)
+    elif isinstance(index_mapping, list):
+        return np.array(index_mapping, dtype=np.uint32)
+    else:
+        raise ValueError(f"Invalid type for cached `index_mapping`: "
+                         f"{type(index_mapping)} ({index_mapping})")
+def get_mapped_object(obj, index_mapping=None):
+    if index_mapping is None:
+        return obj
+    elif isinstance(obj, h5py.Dataset):
+        return MappedHDF5Dataset(
+            obj,
+            mapping_indices=get_mapping_indices(index_mapping))
+    else:
+        raise ValueError(f"No recipe to convert object of type {type(obj)} "
+                         f"({obj}) to an index-mapped object")

dcnum/segm/segm_thresh.py CHANGED Viewed

@@ -16,7 +16,7 @@ class SegmentThresh(CPUSegmenter):
         Parameters
         ----------
         thresh: int
-            grayscale threhold value for creating the mask image;
+            grayscale threshold value for creating the mask image;
             For a background-corrected image, pixels with values below
             this value are considered to be part of the mask.
         """
@@ -25,7 +25,7 @@ class SegmentThresh(CPUSegmenter):
     @staticmethod
     def segment_approach(image, *,
                          thresh: float = -6):
-        """Mask retrieval as it is done in Shape-In
+        """Mask retrieval using basic thresholding
         Parameters
         ----------
@@ -39,7 +39,7 @@ class SegmentThresh(CPUSegmenter):
         Returns
         -------
         mask: 2d boolean ndarray
-            Mask image for the give index
+            Mask image for the given index
         """
         assert thresh < 0, "threshold values above zero not supported!"
         return image < thresh

dcnum 0.17.2__py3-none-any.whl → 0.19.0__py3-none-any.whl

Potentially problematic release.

dcnum 0.17.2py3-none-any.whl → 0.19.0py3-none-any.whl