PyPI - dcnum - Versions diffs - 0.23.1__py3-none-any.whl → 0.25.1__py3-none-any.whl - Mend

dcnum 0.23.1py3-none-any.whl → 0.25.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dcnum might be problematic. Click here for more details.

Files changed (22) hide show

dcnum/_version.py +2 -2
dcnum/feat/event_extractor_manager_thread.py +6 -5
dcnum/feat/feat_background/base.py +24 -9
dcnum/feat/feat_background/bg_sparse_median.py +56 -30
dcnum/logic/ctrl.py +118 -43
dcnum/logic/job.py +22 -0
dcnum/meta/ppid.py +4 -3
dcnum/read/__init__.py +1 -0
dcnum/read/cache.py +4 -3
dcnum/read/detect_flicker.py +44 -0
dcnum/read/hdf5_data.py +138 -70
dcnum/read/mapped.py +15 -2
dcnum/segm/segm_torch/__init__.py +8 -4
dcnum/segm/segm_torch/segm_torch_mpo.py +4 -1
dcnum/write/__init__.py +1 -1
dcnum/write/queue_collector_thread.py +7 -14
dcnum/write/writer.py +149 -36
{dcnum-0.23.1.dist-info → dcnum-0.25.1.dist-info}/METADATA +2 -2
{dcnum-0.23.1.dist-info → dcnum-0.25.1.dist-info}/RECORD +22 -21
{dcnum-0.23.1.dist-info → dcnum-0.25.1.dist-info}/WHEEL +1 -1
{dcnum-0.23.1.dist-info → dcnum-0.25.1.dist-info}/LICENSE +0 -0
{dcnum-0.23.1.dist-info → dcnum-0.25.1.dist-info}/top_level.txt +0 -0

dcnum/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.23.1'
-__version_tuple__ = version_tuple = (0, 23, 1)
+__version__ = version = '0.25.1'
+__version_tuple__ = version_tuple = (0, 25, 1)

dcnum/feat/event_extractor_manager_thread.py CHANGED Viewed

@@ -96,12 +96,13 @@ class EventExtractorManagerThread(threading.Thread):
             # If the writer_dq starts filling up, then this could lead to
             # an oom-kill signal. Stall for the writer to prevent this.
             if (ldq := len(self.writer_dq)) > 1000:
-                time.sleep(1)
-                ldq2 = len(self.writer_dq)
-                stall_time = max(0., (ldq2 - 200) / ((ldq - ldq2) or 1))
-                time.sleep(stall_time)
+                stalled_sec = 0.
+                for ii in range(60):
+                    if len(self.writer_dq) > 200:
+                        time.sleep(.5)
+                        stalled_sec += .5
                 self.logger.warning(
-                    f"Stalled {stall_time + 1:.1f}s for slow writer "
+                    f"Stalled {stalled_sec:.1f}s due to slow writer "
                     f"({ldq} chunks queued)")
             unavailable_slots = 0

dcnum/feat/feat_background/base.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import abc
 import functools
 import inspect
+import logging
 import multiprocessing as mp
 import pathlib
+import time
 import h5py
@@ -41,8 +43,11 @@ class Background(abc.ABC):
         kwargs:
             Additional keyword arguments passed to the subclass.
         """
+        self.logger = logging.getLogger(
+            f"dcnum.feat.feat_background.{self.__class__.__name__}")
         # proper conversion to Path objects
         output_path = pathlib.Path(output_path)
+        self.output_path = output_path
         if isinstance(input_data, str):
             input_data = pathlib.Path(input_data)
         # kwargs checks
@@ -188,20 +193,30 @@ class Background(abc.ABC):
             return self.image_proc.value
     def process(self):
+        """Perform the background computation
+        This irreversibly removes/overrides any "image_bg" and
+        "bg_off" features defined in the output file `self.h5out`.
+        """
+        t0 = time.perf_counter()
         # Delete any old background data
-        for key in ["image_bg", "bg_off"]:
-            if key in self.h5out["events"]:
-                del self.h5out["events"][key]
+        for ds_key in ["image_bg", "bg_off"]:
+            for grp_key in ["events", "basin_events"]:
+                if grp_key in self.h5out and ds_key in self.h5out[grp_key]:
+                    del self.h5out[grp_key][ds_key]
         # Perform the actual background computation
         self.process_approach()
         bg_ppid = self.get_ppid()
         # Store pipeline information in the image_bg/bg_off feature
-        for key in ["image_bg", "bg_off"]:
-            if key in self.h5out["events"]:
-                self.h5out[f"events/{key}"].attrs["dcnum ppid background"] = \
-                    bg_ppid
-                self.h5out[F"events/{key}"].attrs["dcnum ppid generation"] = \
-                    ppid.DCNUM_PPID_GENERATION
+        for ds_key in ["image_bg", "bg_off"]:
+            for grp_key in ["events", "basin_events"]:
+                if grp_key in self.h5out and ds_key in self.h5out[grp_key]:
+                    self.h5out[f"{grp_key}/{ds_key}"].attrs[
+                        "dcnum ppid background"] = bg_ppid
+                    self.h5out[F"{grp_key}/{ds_key}"].attrs[
+                        "dcnum ppid generation"] = ppid.DCNUM_PPID_GENERATION
+        self.logger.info(
+            f"Background computation time: {time.perf_counter()-t0:.1f}s")
     @abc.abstractmethod
     def process_approach(self):

dcnum/feat/feat_background/bg_sparse_median.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import logging
 import queue
 import time
@@ -9,14 +8,13 @@ from ...read import HDF5Data
 from .base import mp_spawn, Background
-logger = logging.getLogger(__name__)
 class BackgroundSparseMed(Background):
     def __init__(self, input_data, output_path, kernel_size=200,
                  split_time=1., thresh_cleansing=0, frac_cleansing=.8,
                  offset_correction=True,
-                 compress=True, num_cpus=None):
+                 compress=True,
+                 num_cpus=None):
         """Sparse median background correction with cleansing
         In contrast to the rolling median background correction,
@@ -61,7 +59,7 @@ class BackgroundSparseMed(Background):
         offset_correction: bool
             The sparse median background correction produces one median
             image for multiple input frames (BTW this also leads to very
-            efficient data storage with HDF5 data compression filters). In
+            efficient data storage with internal HDF5 basins). In
             case the input frames are subject to frame-by-frame brightness
             variations (e.g. flickering of the illumination source), it
             is useful to have an offset value per frame that can then be
@@ -79,6 +77,11 @@ class BackgroundSparseMed(Background):
         num_cpus: int
             Number of CPUs to use for median computation. Defaults to
             `multiprocessing.cpu_count()`.
+        .. versionchanged:: 0.23.5
+            The background image data are stored as an internal
+            mapped basin to reduce the output file size.
         """
         super(BackgroundSparseMed, self).__init__(
             input_data=input_data,
@@ -93,7 +96,7 @@ class BackgroundSparseMed(Background):
         )
         if kernel_size > len(self.input_data):
-            logger.warning(
+            self.logger.warning(
                 f"The kernel size {kernel_size} is too large for input data"
                 f"size {len(self.input_data)}. Setting it to input data size!")
             kernel_size = len(self.input_data)
@@ -126,13 +129,14 @@ class BackgroundSparseMed(Background):
                 else:
                     # compute time using frame rate (approximate)
                     dur = self.image_count / fr * 1.5
-                    logger.info(f"Approximating duration: {dur/60:.1f}min")
+                    self.logger.info(
+                        f"Approximating duration: {dur/60:.1f}min")
                     self.time = np.linspace(0, dur, self.image_count,
                                             endpoint=True)
         if self.time is None:
             # No HDF5 file or no information therein; Make an educated guess.
             dur = self.image_count / 3600 * 1.5
-            logger.info(f"Guessing duration: {dur/60:.1f}min")
+            self.logger.info(f"Guessing duration: {dur/60:.1f}min")
             self.time = np.linspace(0, dur, self.image_count,
                                     endpoint=True)
@@ -222,7 +226,7 @@ class BackgroundSparseMed(Background):
         offset_correction: bool
             The sparse median background correction produces one median
             image for multiple input frames (BTW this also leads to very
-            efficient data storage with HDF5 data compression filters). In
+            efficient data storage with internal HDF5 basins). In
             case the input frames are subject to frame-by-frame brightness
             variations (e.g. flickering of the illumination source), it
             is useful to have an offset value per frame that can then be
@@ -301,18 +305,18 @@ class BackgroundSparseMed(Background):
                 thresh = np.quantile(ref, self.frac_cleansing)
                 used = ref <= thresh
                 frac_remove = np.sum(~used) / used.size
-                logger.warning(
+                self.logger.warning(
                     f"{frac_remove_user:.1%} of the background images would "
                     f"be removed with the current settings, so we enforce "
                     f"`frac_cleansing`. To avoid this warning, try decreasing "
                     f"`thresh_cleansing` or `frac_cleansing`. The new "
                     f"threshold is {thresh_fact / thresh}.")
-            logger.info(f"Cleansed {frac_remove:.2%}")
+            self.logger.info(f"Cleansed {frac_remove:.2%}")
             step_times = self.step_times[used]
             bg_images = self.bg_images[used]
         else:
-            logger.info("Background series cleansing disabled")
+            self.logger.info("Background series cleansing disabled")
             step_times = self.step_times
             bg_images = self.bg_images
@@ -322,35 +326,55 @@ class BackgroundSparseMed(Background):
         idx1 = None
         for ii in range(len(step_times)):
             t1 = step_times[ii]
-            idx1 = np.argmin(np.abs(self.time - t1 + self.split_time/2))
+            idx1 = np.argmin(np.abs(self.time - t1 - self.split_time/2))
             bg_idx[idx0:idx1] = ii
             idx0 = idx1
         if idx1 is not None:
             # Fill up remainder of index array with last entry
             bg_idx[idx1:] = ii
-        self.image_proc.value = 1
-        # Write background data
-        pos = 0
-        step = 1000
-        while pos < self.image_count:
-            stop = min(pos + step, self.image_count)
-            cur_slice = slice(pos, stop)
-            cur_bg_data = bg_images[bg_idx[cur_slice]]
-            self.writer.store_feature_chunk("image_bg", cur_bg_data)
-            if self.offset_correction:
+        # Store the background images as an internal mapped basin
+        self.writer.store_basin(
+            name="background images",
+            description=f"Pipeline identifier: {self.get_ppid()}",
+            mapping=bg_idx,
+            internal_data={"image_bg": bg_images}
+            )
+        # store the offset correction, if applicable
+        if self.offset_correction:
+            self.logger.info("Computing offset correction")
+            # compute the mean at the top of all background images
+            sh, sw = self.input_data.shape[1:]
+            roi_full = (slice(None), slice(0, 20), slice(0, sw))
+            bg_data_mean = np.mean(bg_images[roi_full], axis=(1, 2))
+            pos = 0
+            step = self.writer.get_best_nd_chunks(item_shape=(sh, sw),
+                                                  feat_dtype=np.uint8)[0]
+            bg_off = np.zeros(self.image_count, dtype=float)
+            # For every chunk in the input image data, compute that
+            # value as well and store the resulting offset value.
+            # TODO: Could this be parallelized, or are we limited in reading?
+            while pos < self.image_count:
+                stop = min(pos + step, self.image_count)
                 # Record background offset correction "bg_off". We take a
                 # slice of 20px from the top of the image (there are normally
                 # no events here, only the channel walls are visible).
-                sh, sw = self.input_data.shape[1:]
-                roi_full = (slice(None), slice(0, 20), slice(0, sw))
+                cur_slice = slice(pos, stop)
+                # mean background brightness
+                val_bg = bg_data_mean[bg_idx[cur_slice]]
+                # mean image brightness
                 roi_cur = (cur_slice, slice(0, 20), slice(0, sw))
-                val_bg = np.mean(cur_bg_data[roi_full], axis=(1, 2))
                 val_dat = np.mean(self.input_data[roi_cur], axis=(1, 2))
                 # background image = image_bg + bg_off
-                self.writer.store_feature_chunk("bg_off", val_dat - val_bg)
-            pos += step
+                bg_off[cur_slice] = val_dat - val_bg
+                # set progress
+                self.image_proc.value = 0.5 * (1 + pos / self.image_count)
+                pos = stop
+            # finally, store the background offset feature
+            self.writer.store_feature_chunk("bg_off", bg_off)
+        self.image_proc.value = 1
     def process_second(self,
                        ii: int,
@@ -393,7 +417,9 @@ class BackgroundSparseMed(Background):
         self.bg_images[ii] = self.shared_output.reshape(self.image_shape)
-        self.image_proc.value = idx_stop / self.image_count
+        self.image_proc.value = idx_stop / (
+                # with offset correction, everything is slower
+                self.image_count * (1 + self.offset_correction))
 class WorkerSparseMed(mp_spawn.Process):

dcnum/logic/ctrl.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import collections
 import datetime
 import hashlib
+import importlib
 import json
 import logging
 from logging.handlers import QueueListener
@@ -33,6 +34,7 @@ from ..write import (
 from .job import DCNumPipelineJob
 from .json_encoder import ExtendedJSONEncoder
 # Force using "spawn" method for multiprocessing, because we are using
 # queues and threads and would end up with race conditions otherwise.
 mp_spawn = mp.get_context("spawn")
@@ -401,6 +403,12 @@ class DCNumJobRunner(threading.Thread):
                                   features=orig_feats,
                                   mapping=None)
+        # Handle basin data according to the user's request
+        self.state = "plumbing"
+        self.task_enforce_basin_strategy()
+        self.state = "cleanup"
         with HDF5Writer(self.path_temp_out) as hw:
             # pipeline metadata
             hw.h5.attrs["pipeline:dcnum generation"] = self.ppdict["gen_id"]
@@ -430,6 +438,16 @@ class DCNumJobRunner(threading.Thread):
                                  "build": ", ".join(platform.python_build()),
                                  "implementation":
                                      platform.python_implementation(),
+                                 "libraries": get_library_versions_dict([
+                                     "cv2",
+                                     "h5py",
+                                     "mahotas",
+                                     "numba",
+                                     "numpy",
+                                     "scipy",
+                                     "skimage",
+                                     "torch",
+                                 ]),
                                  "version": platform.python_version(),
                                  },
                              "system": {
@@ -450,11 +468,7 @@ class DCNumJobRunner(threading.Thread):
             # copy metadata/logs/tables from original file
             with h5py.File(self.job["path_in"]) as h5_src:
-                copy_metadata(h5_src=h5_src,
-                              h5_dst=hw.h5,
-                              # Don't copy basins, we would have to index-map
-                              # them first.
-                              copy_basins=False)
+                copy_metadata(h5_src=h5_src, h5_dst=hw.h5)
             if redo_seg:
                 # Store the correct measurement identifier. This is used to
                 # identify this file as a correct basin in subsequent pipeline
@@ -478,12 +492,6 @@ class DCNumJobRunner(threading.Thread):
                 mid_new = f"{mid_cur}_{mid_ap}" if mid_cur else mid_ap
                 hw.h5.attrs["experiment:run identifier"] = mid_new
-        # Handle basin data according to the user's request
-        self.state = "plumbing"
-        self.task_enforce_basin_strategy()
-        self.state = "cleanup"
         trun = datetime.timedelta(seconds=round(time.monotonic() - time_start))
         self.logger.info(f"Run duration: {str(trun)}")
         self.logger.info(time.strftime("Run stop: %Y-%m-%d-%H.%M.%S",
@@ -535,24 +543,19 @@ class DCNumJobRunner(threading.Thread):
         """
         self._progress_bn = 0
         t0 = time.perf_counter()
-        # We need to make sure that the features are correctly attributed
-        # from the input files. E.g. if the input file already has
-        # background images, but we recompute the background images, then
-        # we have to use the data from the recomputed background file.
-        # We achieve this by keeping a specific order and only copying those
-        # features that we don't already have in the output file.
-        feats_raw = [
-            # 1. background data from the temporary input image
-            # (this must come before draw [sic!])
-            [self.dtin.h5, ["image_bg", "bg_off"], "critical"],
-            # 2. frame-based scalar features from the raw input file
-            # (e.g. "temp" or "frame")
-            [self.draw.h5, self.draw.features_scalar_frame, "optional"],
-            # 3. image features from the input file
-            [self.draw.h5, ["image", "image_bg", "bg_off"], "optional"],
-        ]
-        with h5py.File(self.path_temp_out, "a") as hout:
-            hw = HDF5Writer(hout)
+        # We have these points to consider:
+        # - We must use the `basinmap` feature to map from the original
+        #   file to the output file.
+        # - We must copy "bg_off" and "image_bg" to the output file.
+        # - For the "drain" basin strategy, we also have to copy all the
+        #   other features.
+        # - If "image_bg" is defined as an internal basin in the input
+        #   file, we have to convert the mapping and store a corresponding
+        #   internal basin in the output file.
+        # Determine the basinmap feature
+        with HDF5Writer(self.path_temp_out) as hw:
+            hout = hw.h5
             # First, we have to determine the basin mapping from input to
             # output. This information is stored by the QueueCollectorThread
             # in the "basinmap0" feature, ready to be used by us.
@@ -565,21 +568,22 @@ class DCNumJobRunner(threading.Thread):
                 # mapping of the input file was set to slice(1, 100), then the
                 # first image would not be there, and we would have
                 # [1, 1, 1, ...].
-                idx_um = hout["events/index_unmapped"]
+                idx_um = hout["events/index_unmapped"][:]
                 # If we want to convert this to an actual basinmap feature,
                 # then we have to convert those indices to indices that map
                 # to the original input HDF5 file.
                 raw_im = self.draw.index_mapping
                 if raw_im is None:
-                    self.logger.info("Input file mapped with basinmap0")
                     # Create a hard link to save time and space
                     hout["events/basinmap0"] = hout["events/index_unmapped"]
-                    basinmap = idx_um
+                    basinmap0 = idx_um
                 else:
-                    basinmap = get_mapping_indices(raw_im)[idx_um]
+                    self.logger.info("Converting input mapping")
+                    basinmap0 = get_mapping_indices(raw_im)[idx_um]
                     # Store the mapped basin data in the output file.
-                    hw.store_feature_chunk("basinmap0", basinmap)
+                    hw.store_feature_chunk("basinmap0", basinmap0)
+                self.logger.info("Input mapped to output with basinmap0")
                 # We don't need them anymore.
                 del hout["events/index_unmapped"]
@@ -587,19 +591,72 @@ class DCNumJobRunner(threading.Thread):
                 # is the size of the raw dataset and the latter is its mapped
                 # size!
                 size_raw = self.draw.h5.attrs["experiment:event count"]
-                if (len(basinmap) == size_raw
-                        and np.all(basinmap == np.arange(size_raw))):
+                if (len(basinmap0) == size_raw
+                        and np.all(basinmap0 == np.arange(size_raw))):
                     # This means that the images in the input overlap perfectly
                     # with the images in the output, i.e. a "copy" segmenter
                     # was used or something is very reproducible.
                     # We set basinmap to None to be more efficient.
-                    basinmap = None
+                    basinmap0 = None
             else:
                 # The input is identical to the output, because we are using
                 # the same pipeline identifier.
-                basinmap = None
+                basinmap0 = None
+            # List of features we have to copy from input to output.
+            # We need to make sure that the features are correctly attributed
+            # from the input files. E.g. if the input file already has
+            # background images, but we recompute the background images, then
+            # we have to use the data from the recomputed background file.
+            # We achieve this by keeping a specific order and only copying
+            # those features that we don't already have in the output file.
+            feats_raw = [
+                # background data from the temporary input image
+                [self.dtin.h5, ["bg_off"], "critical"],
+                [self.draw.h5, self.draw.features_scalar_frame, "optional"],
+                [self.draw.h5, ["image", "bg_off"], "optional"],
+            ]
+            # Store image_bg as an internal basin, if defined in input
+            for idx in range(len(self.dtin.basins)):
+                bn_dict = self.dtin.basins[idx]
+                if (bn_dict["type"] == "internal"
+                        and "image_bg" in bn_dict["features"]):
+                    self.logger.info(
+                        "Copying internal basin background images")
+                    bn_grp, bn_feats, bn_map = self.dtin.get_basin_data(idx)
+                    assert "image_bg" in bn_feats
+                    # Load all images into memory (should only be ~600)
+                    bg_images1 = self.dtin.h5["basin_events"]["image_bg"][:]
+                    # Get the original internal mapping for these images
+                    # Note that `basinmap0` always refers to indices in the
+                    # original raw input file, and not to indices in an
+                    # optional mapped input file (using `index_mapping`).
+                    # Therefore, we do `self.dtin.h5["events"]["basinmap0"]`
+                    # instead of `self.dtin["basinmap0"]`
+                    basinmap_in = self.dtin.h5["events"][bn_dict["mapping"]][:]
+                    # Now we have to convert the indices in `basinmap_in`
+                    # to indices in the output file.
+                    basinmap1 = basinmap_in[basinmap0]
+                    # Store the internal mapping in the output file
+                    hw.store_basin(name=bn_dict["name"],
+                                   description=bn_dict["description"],
+                                   mapping=basinmap1,
+                                   internal_data={"image_bg": bg_images1}
+                                   )
+                    break
+            else:
+                self.logger.info("Background images must be copied")
+                # There is no internal image_bg feature, probably because
+                # the user did not use the sparsemed background correction.
+                # In this case, we simply add "image_bg" to the `feats_raw`.
+                feats_raw += [
+                    [self.dtin.h5, ["image_bg"], "critical"],
+                    [self.draw.h5, ["image_bg"], "optional"],
+                ]
+            # Copy the features required in the output file.
             for hin, feats, importance in feats_raw:
                 # Only consider features that are available in the input
                 # and that are not already in the output.
@@ -614,7 +671,7 @@ class DCNumJobRunner(threading.Thread):
                     copy_features(h5_src=hin,
                                   h5_dst=hout,
                                   features=feats,
-                                  mapping=basinmap)
+                                  mapping=basinmap0)
                 else:
                     # TAP: Create basins for the "optional" features in the
                     # output file. Note that the "critical" features never
@@ -622,11 +679,17 @@ class DCNumJobRunner(threading.Thread):
                     self.logger.debug(f"Creating basin for {feats}")
                     # Relative and absolute paths.
                     pin = pathlib.Path(hin.filename).resolve()
+                    paths = [pin]
                     pout = pathlib.Path(hout.filename).resolve().parent
-                    paths = [pin, os.path.relpath(pin, pout)]
+                    try:
+                        paths.append(os.path.relpath(pin, pout))
+                    except ValueError:
+                        # This means it is impossible to compute a relative
+                        # path (e.g. different drive letter on Windows).
+                        pass
                     hw.store_basin(name="dcnum basin",
                                    features=feats,
-                                   mapping=basinmap,
+                                   mapping=basinmap0,
                                    paths=paths,
                                    description=f"Created with dcnum {version}",
                                    )
@@ -719,7 +782,6 @@ class DCNumJobRunner(threading.Thread):
         # Start the data collection thread
         thr_coll = QueueCollectorThread(
-            data=self.dtin,
             event_queue=fe_kwargs["event_queue"],
             writer_dq=writer_dq,
             feat_nevents=fe_kwargs["feat_nevents"],
@@ -780,6 +842,19 @@ class DCNumJobRunner(threading.Thread):
         self.logger.info("Finished segmentation and feature extraction")
+def get_library_versions_dict(library_name_list):
+    version_dict = {}
+    for library_name in library_name_list:
+        try:
+            lib = importlib.import_module(library_name)
+        except BaseException:
+            version = None
+        else:
+            version = lib.__version__
+        version_dict[library_name] = version
+    return version_dict
 def join_thread_helper(thr, timeout, retries, logger, name):
     for _ in range(retries):
         thr.join(timeout=timeout)

dcnum/logic/job.py CHANGED Viewed

@@ -182,3 +182,25 @@ class DCNumPipelineJob:
         if len(ret) == 1:
             ret = ret[0]
         return ret
+    def validate(self):
+        """Make sure the pipeline will run given the job kwargs
+        Returns
+        -------
+        True:
+            for testing convenience
+        Raises
+        ------
+        dcnum.segm.SegmenterNotApplicableError:
+            the segmenter is incompatible with the input path
+        """
+        # Check segmenter applicability applicability
+        seg_cls = get_available_segmenters()[self.kwargs["segmenter_code"]]
+        with HDF5Data(self.kwargs["path_in"]) as hd:
+            seg_cls.validate_applicability(
+                segmenter_kwargs=self.kwargs["segmenter_kwargs"],
+                logs=hd.logs,
+                meta=hd.meta)
+        return True

dcnum/meta/ppid.py CHANGED Viewed

@@ -7,10 +7,11 @@ import pathlib
 from typing import Dict, List, Protocol
 import warnings
+import numpy as np
 #: Increment this string if there are breaking changes that make
 #: previous pipelines unreproducible.
-DCNUM_PPID_GENERATION = "10"
+DCNUM_PPID_GENERATION = "11"
 class ClassWithPPIDCapabilities(Protocol):
@@ -140,9 +141,9 @@ def kwargs_to_ppid(cls: ClassWithPPIDCapabilities,
                 path = pathlib.Path(val)
                 if path.exists():
                     val = path.name
-            if isinstance(val, bool):
+            if isinstance(val, (bool, np.bool_)):
                 val = int(val)  # do not print e.g. "True"
-            elif isinstance(val, float):
+            elif isinstance(val, (float, np.floating)):
                 if val == int(val):
                     val = int(val)  # omit the ".0" at the end
             concat_strings.append(f"{abr}={val}")

dcnum/read/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
 # flake8: noqa: F401
 from .cache import md5sum
 from .const import PROTECTED_FEATURES
+from .detect_flicker import detect_flickering
 from .hdf5_data import HDF5Data, HDF5ImageCache, concatenated_hdf5_data
 from .mapped import get_mapping_indices, get_mapped_object

dcnum/read/cache.py CHANGED Viewed

@@ -36,9 +36,10 @@ class BaseImageChunkCache(abc.ABC):
     def __getitem__(self, index):
         if isinstance(index, (slice, list, np.ndarray)):
             if isinstance(index, slice):
-                indices = np.arange(index.start or 0,
-                                    index.stop or len(self),
-                                    index.step)
+                indices = np.arange(
+                    index.start or 0,
+                    min(index.stop, len(self)) if index.stop else len(self),
+                    index.step)
             else:
                 indices = index
             array_out = np.empty((len(indices),) + self.image_shape,

dcnum 0.23.1__py3-none-any.whl → 0.25.1__py3-none-any.whl

Potentially problematic release.

dcnum 0.23.1py3-none-any.whl → 0.25.1py3-none-any.whl