PyPI - dcnum - Versions diffs - 0.18.0__tar.gz → 0.19.1__tar.gz - Mend

dcnum 0.18.0tar.gz → 0.19.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dcnum might be problematic. Click here for more details.

Files changed (104) hide show

{dcnum-0.18.0 → dcnum-0.19.1}/CHANGELOG RENAMED Viewed

@@ -1,3 +1,9 @@
+0.19.1
+ - enh: support steps when specifying data slices in `index_mapping`
+0.19.0
+ - enh: elevate `HDF5Data`s `index_mapping` to pipeline identifier status
+   (this changes the pipeline identifier)
+ - enh: improve sanity checks for `BackgroundRollMed`
 0.18.0
  - BREAKING CHANGE: mask postprocessing did a morphological opening instead
    of a morphological closing, failing to remove spurious noise

{dcnum-0.18.0/src/dcnum.egg-info → dcnum-0.19.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dcnum
-Version: 0.18.0
+Version: 0.19.1
 Summary: numerics toolbox for imaging deformability cytometry
 Author: Maximilian Schlögel, Paul Müller
 Maintainer-email: Paul Müller <dev@craban.de>

{dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/_version.py RENAMED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.18.0'
-__version_tuple__ = version_tuple = (0, 18, 0)
+__version__ = version = '0.19.1'
+__version_tuple__ = version_tuple = (0, 19, 1)

{dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/feat_background/bg_roll_median.py RENAMED Viewed

@@ -119,7 +119,7 @@ class BackgroundRollMed(Background):
         """Check user-defined properties of this class
         This method primarily exists so that the CLI knows which
-        keyword arguements can be passed to this class.
+        keyword arguments can be passed to this class.
         Parameters
         ----------
@@ -132,7 +132,8 @@ class BackgroundRollMed(Background):
             `kernel_size` will not increase computation speed. Larger
             values lead to a higher memory consumption.
         """
-        assert kernel_size > 0
+        assert kernel_size > 0, "kernel size must be positive number"
+        assert kernel_size % 2 == 0, "kernel size must be even number"
         assert batch_size > kernel_size
     def get_slices_for_batch(self, batch_index=0):

{dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/logic/ctrl.py RENAMED Viewed

@@ -1,11 +1,9 @@
 import collections
 import datetime
-import hashlib
 import json
 import logging
 from logging.handlers import QueueListener
 import multiprocessing as mp
-import numbers
 import os
 import pathlib
 import platform
@@ -16,7 +14,6 @@ import traceback
 import uuid
 import h5py
-import numpy as np
 from ..feat.feat_background.base import get_available_background_methods
 from ..feat.queue_event_extractor import QueueEventExtractor
@@ -313,7 +310,17 @@ class DCNumJobRunner(threading.Thread):
             # Whether pipeline hash is invalid.
             ppid.compute_pipeline_hash(**datdict) != dathash
             # Whether the input file is the original output of the pipeline.
-            or len(self.draw) != evyield)
+            or len(self.draw) != evyield
+            # If index mapping is defined, then we always redo the pipeline.
+            # If the pipeline hashes are identical and index mapping is not
+            # None, then both pipelines were done with index mapping.
+            # But applying the same pipeline with index mapping in series
+            # will lead to a different result in the second run (e.g. 1st
+            # pipeline run: take every 2nd event; 2nd pipeline run: take
+            # every second event -> results in every 4th event in output of
+            # second pipeline run).
+            or self.draw.index_mapping is not None
+        )
         # Do we have to recompute the background data? In addition to the
         # hash sanity check above, check the generation, input data,
         # and background pipeline identifiers.
@@ -387,21 +394,7 @@ class DCNumJobRunner(threading.Thread):
             hw.h5.attrs["pipeline:dcnum yield"] = self.event_count
             # index mapping information
             im = self.job.kwargs["data_kwargs"].get("index_mapping", None)
-            if im is None:
-                dim = "0"
-            elif isinstance(im, numbers.Number):
-                dim = f"{im}"
-            elif isinstance(im, slice):
-                dim = (f"{im.start if im.start is not None else 'n'}"
-                       + f"-{im.stop if im.stop is not None else 'n'}"
-                       + f"-{im.step if im.step is not None else 'n'}"
-                       )
-            elif isinstance(im, (list, np.ndarray)):
-                idhash = hashlib.md5(
-                    np.array(im, dtype=np.uint32).tobytes()).hexdigest()
-                dim = f"h-{idhash[:8]}"
-            else:
-                dim = "unknown"
+            dim = HDF5Data.get_ppid_index_mapping(im)
             hw.h5.attrs["pipeline:dcnum mapping"] = dim
             # regular metadata
             hw.h5.attrs["experiment:event count"] = self.event_count

{dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/read/hdf5_data.py RENAMED Viewed

@@ -1,7 +1,9 @@
 from __future__ import annotations
+import hashlib
 import io
 import json
+import numbers
 import pathlib
 import tempfile
 from typing import Dict, BinaryIO, List
@@ -293,7 +295,9 @@ class HDF5Data:
         self.h5.close()
     def get_ppid(self):
-        return self.get_ppid_from_ppkw({"pixel_size": self.pixel_size})
+        return self.get_ppid_from_ppkw(
+            {"pixel_size": self.pixel_size,
+             "index_mapping": self.index_mapping})
     @classmethod
     def get_ppid_code(cls):
@@ -304,10 +308,34 @@ class HDF5Data:
         # Data does not really fit into the PPID scheme we use for the rest
         # of the pipeline. This implementation here is custom.
         code = cls.get_ppid_code()
+        # pixel size
         ppid_ps = f"{kwargs['pixel_size']:.8f}".rstrip("0")
-        kwid = "^".join([f"p={ppid_ps}"])
+        # index mapping
+        ppid_im = cls.get_ppid_index_mapping(kwargs.get("index_mapping", None))
+        kwid = "^".join([f"p={ppid_ps}", f"i={ppid_im}"])
         return ":".join([code, kwid])
+    @staticmethod
+    def get_ppid_index_mapping(index_mapping):
+        """Return the pipeline identifier part for index mapping"""
+        im = index_mapping
+        if im is None:
+            dim = "0"
+        elif isinstance(im, numbers.Integral):
+            dim = f"{im}"
+        elif isinstance(im, slice):
+            dim = (f"{im.start if im.start is not None else 'n'}"
+                   + f"-{im.stop if im.stop is not None else 'n'}"
+                   + f"-{im.step if im.step is not None else 'n'}"
+                   )
+        elif isinstance(im, (list, np.ndarray)):
+            idhash = hashlib.md5(
+                np.array(im, dtype=np.uint32).tobytes()).hexdigest()
+            dim = f"h-{idhash[:8]}"
+        else:
+            dim = "unknown"
+        return dim
     @staticmethod
     def get_ppkw_from_ppid(dat_ppid):
         # Data does not fit in the PPID scheme we use, but we still
@@ -321,6 +349,20 @@ class HDF5Data:
             var, val = item.split("=")
             if var == "p":
                 kwargs["pixel_size"] = float(val)
+            elif var == "i":
+                if val.startswith("h-") or val == "unknown":
+                    raise ValueError(f"Cannot invert index mapping {val}")
+                elif val == "0":
+                    kwargs["index_mapping"] = None
+                elif val.count("-"):
+                    start, stop, step = val.split("-")
+                    kwargs["index_mapping"] = slice(
+                        None if start == "n" else int(start),
+                        None if stop == "n" else int(stop),
+                        None if step == "n" else int(step)
+                    )
+                else:
+                    kwargs["index_mapping"] = int(val)
             else:
                 raise ValueError(f"Invalid parameter '{var}'!")
         return kwargs

{dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/read/mapped.py RENAMED Viewed

@@ -54,12 +54,10 @@ def _get_mapping_indices_cached(
         return np.arange(index_mapping)
     elif isinstance(index_mapping, tuple):
         im_slice = slice(*index_mapping)
-        if im_slice.step is not None:
-            raise NotImplementedError("Slices with step not implemented yet")
         if im_slice.stop is None or im_slice.start is None:
             raise NotImplementedError(
                 "Slices must have start and stop defined")
-        return np.arange(im_slice.start, im_slice.stop)
+        return np.arange(im_slice.start, im_slice.stop, im_slice.step)
     elif isinstance(index_mapping, list):
         return np.array(index_mapping, dtype=np.uint32)
     else:

{dcnum-0.18.0 → dcnum-0.19.1/src/dcnum.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dcnum
-Version: 0.18.0
+Version: 0.19.1
 Summary: numerics toolbox for imaging deformability cytometry
 Author: Maximilian Schlögel, Paul Müller
 Maintainer-email: Paul Müller <dev@craban.de>

{dcnum-0.18.0 → dcnum-0.19.1}/tests/test_logic_pipeline.py RENAMED Viewed

@@ -59,17 +59,12 @@ def test_chained_pipeline():
                == "sparsemed:k=250^s=1^t=0^f=0.8^o=1"
-@pytest.mark.parametrize("index_mapping,size,mapping_out", [
-    (None, 395, "0"),
-    (5, 11, "5"),
-    (slice(3, 5, None), 6, "3-5-n"),
-    ([3, 5, 6, 7], 7, "h-6e582938"),
-])
-def test_duplicate_pipeline(index_mapping, size, mapping_out):
+def test_duplicate_pipeline():
     """Test running the same pipeline twice
-    When the pipeline is run on a file with the same pipeline
-    identifier, data are just copied over. Nothing much fancy else.
+    When the pipeline is run on a file that has been run with the same
+    pipeline identifier, then we do not run the pipeline. Instead, we
+    copy the data from the first file.
     """
     path_orig = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
     path = path_orig.with_name("input.rtdc")
@@ -79,13 +74,12 @@ def test_duplicate_pipeline(index_mapping, size, mapping_out):
     job = logic.DCNumPipelineJob(
         path_in=path,
         path_out=path2,
-        data_kwargs={"index_mapping": index_mapping},
         background_code="copy",
         segmenter_code="thresh",
         segmenter_kwargs={"thresh": -6,
                           "kwargs_mask": {"closing_disk": 0}},
         debug=True)
-    assert job.kwargs["data_kwargs"]["index_mapping"] == index_mapping
+    assert job.kwargs["data_kwargs"].get("index_mapping") is None
     # perform the initial pipeline
     with logic.DCNumJobRunner(job=job) as runner:
@@ -102,16 +96,12 @@ def test_duplicate_pipeline(index_mapping, size, mapping_out):
     # get the first image for reference
     with h5py.File(path) as h5:
-        if index_mapping is None:
-            idx0 = 0
-        else:
-            idx0 = read.get_mapping_indices(index_mapping)[0]
-        im0 = h5["/events/image"][idx0]
+        im0 = h5["/events/image"][0]
     # remove all logs just to be sure nothing interferes
     with h5py.File(path2, "a") as h5:
-        assert h5.attrs["pipeline:dcnum mapping"] == mapping_out
-        assert len(h5["events/deform"]) == size
+        assert h5.attrs["pipeline:dcnum mapping"] == "0"
+        assert len(h5["events/deform"]) == 395
         del h5["logs"]
     # now when we do everything again, not a thing should be done
@@ -140,11 +130,227 @@ def test_duplicate_pipeline(index_mapping, size, mapping_out):
         assert "deform" in h5["events"]
         assert "image" in h5["events"]
         assert "image_bg" in h5["events"]
-        assert len(h5["events/deform"]) == size
+        assert len(h5["events/deform"]) == 395
         assert h5.attrs["pipeline:dcnum mapping"] == "0"
         assert np.all(h5["events/image"][0] == im0)
+def test_duplicate_pipeline_redo_index_mapping():
+    """Test running the same pipeline twice
+    When the pipeline is run on a file that has been run with the same
+    pipeline identifier, then we do not run the pipeline. Instead, we
+    copy the data from the first file.
+    However, if something is odd, such as index mapping defined in the
+    pipeline then redo the computations.
+    This is the purpose of this test.
+    """
+    path_orig = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
+    path = path_orig.with_name("input.rtdc")
+    path2 = path.with_name("path_intermediate.rtdc")
+    with read.concatenated_hdf5_data(5 * [path_orig], path_out=path):
+        pass
+    job = logic.DCNumPipelineJob(
+        path_in=path,
+        path_out=path2,
+        data_kwargs={"index_mapping": 10},
+        background_code="copy",
+        segmenter_code="thresh",
+        segmenter_kwargs={"thresh": -6,
+                          "kwargs_mask": {"closing_disk": 0}},
+        debug=True)
+    assert job.kwargs["data_kwargs"].get("index_mapping") == 10
+    # perform the initial pipeline
+    with logic.DCNumJobRunner(job=job) as runner:
+        runner.run()
+    # Sanity checks for initial job
+    with read.HDF5Data(job["path_out"]) as hd:
+        # Check the logs
+        logdat = " ".join(get_log(hd, time.strftime("dcnum-log-%Y")))
+        assert "Starting background computation" in logdat
+        assert "Finished background computation" in logdat
+        assert "Starting segmentation and feature extraction" in logdat
+        assert "Flushing data to disk" in logdat
+        assert "Finished segmentation and feature extraction" in logdat
+    with h5py.File(path2, "a") as h5:
+        # sanity checks
+        assert h5.attrs["pipeline:dcnum mapping"] == "10"
+        assert len(h5["events/deform"]) == 24
+        assert h5.attrs["pipeline:dcnum yield"] == 24
+        # remove all logs just to be sure nothing interferes
+        del h5["logs"]
+        # Modify the yield, triggering a new pipeline run
+        h5.attrs["pipeline:dcnum yield"] = 111111
+    # now when we do everything again, not a thing should be done
+    job2 = logic.DCNumPipelineJob(
+        path_in=path2,
+        path_out=path2.with_name("final_out.rtdc"),
+        no_basins_in_output=True,
+        data_kwargs={"index_mapping": 10},
+        background_code="copy",
+        segmenter_code="thresh",
+        segmenter_kwargs={"thresh": -6,
+                          "kwargs_mask": {"closing_disk": 0}},
+        debug=True)
+    with logic.DCNumJobRunner(job=job2) as runner2:
+        runner2.run()
+    # Real check for second run (not the `not`s [sic]!)
+    with read.HDF5Data(job2["path_out"]) as hd:
+        # Check the logs
+        logdat = " ".join(get_log(hd, time.strftime("dcnum-log-%Y")))
+        # Background computation is not repeated
+        assert "Starting background computation" not in logdat
+        assert "Finished background computation" not in logdat
+        # Segmentation is repeated
+        assert "Starting segmentation and feature extraction" in logdat
+        assert "Flushing data to disk" in logdat
+        assert "Finished segmentation and feature extraction" in logdat
+    with h5py.File(job2["path_out"]) as h5:
+        assert "deform" in h5["events"]
+        assert "image" in h5["events"]
+        assert "image_bg" in h5["events"]
+        # We have not 24 here, because the index mapping enumerates events,
+        # not frames.
+        assert len(h5["events/deform"]) == 11
+        assert h5.attrs["pipeline:dcnum mapping"] == "10"
+        assert h5.attrs["pipeline:dcnum yield"] == 11
+def test_duplicate_pipeline_redo_yield():
+    """Test running the same pipeline twice
+    When the pipeline is run on a file that has been run with the same
+    pipeline identifier, then we do not run the pipeline. Instead, we
+    copy the data from the first file.
+    However, if something is odd, such as the yield of the pipeline not
+    matching the data in the output file, then redo the computations.
+    This is the purpose of this test.
+    """
+    path_orig = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
+    path = path_orig.with_name("input.rtdc")
+    path2 = path.with_name("path_intermediate.rtdc")
+    with read.concatenated_hdf5_data(5 * [path_orig], path_out=path):
+        pass
+    job = logic.DCNumPipelineJob(
+        path_in=path,
+        path_out=path2,
+        background_code="copy",
+        segmenter_code="thresh",
+        segmenter_kwargs={"thresh": -6,
+                          "kwargs_mask": {"closing_disk": 0}},
+        debug=True)
+    assert job.kwargs["data_kwargs"].get("index_mapping") is None
+    # perform the initial pipeline
+    with logic.DCNumJobRunner(job=job) as runner:
+        runner.run()
+    # Sanity checks for initial job
+    with read.HDF5Data(job["path_out"]) as hd:
+        # Check the logs
+        logdat = " ".join(get_log(hd, time.strftime("dcnum-log-%Y")))
+        assert "Starting background computation" in logdat
+        assert "Finished background computation" in logdat
+        assert "Starting segmentation and feature extraction" in logdat
+        assert "Flushing data to disk" in logdat
+        assert "Finished segmentation and feature extraction" in logdat
+    with h5py.File(path2, "a") as h5:
+        # sanity checks
+        assert h5.attrs["pipeline:dcnum mapping"] == "0"
+        assert len(h5["events/deform"]) == 395
+        assert h5.attrs["pipeline:dcnum yield"] == 395
+        # remove all logs just to be sure nothing interferes
+        del h5["logs"]
+        # Modify the yield, triggering a new pipeline run
+        h5.attrs["pipeline:dcnum yield"] = 111111
+    # now when we do everything again, not a thing should be done
+    job2 = logic.DCNumPipelineJob(
+        path_in=path2,
+        path_out=path2.with_name("final_out.rtdc"),
+        no_basins_in_output=True,
+        background_code="copy",
+        segmenter_code="thresh",
+        segmenter_kwargs={"thresh": -6,
+                          "kwargs_mask": {"closing_disk": 0}},
+        debug=True)
+    with logic.DCNumJobRunner(job=job2) as runner2:
+        runner2.run()
+    # Real check for second run (not the `not`s [sic]!)
+    with read.HDF5Data(job2["path_out"]) as hd:
+        # Check the logs
+        logdat = " ".join(get_log(hd, time.strftime("dcnum-log-%Y")))
+        # Background computation is not repeated
+        assert "Starting background computation" not in logdat
+        assert "Finished background computation" not in logdat
+        # Segmentation is repeated
+        assert "Starting segmentation and feature extraction" in logdat
+        assert "Flushing data to disk" in logdat
+        assert "Finished segmentation and feature extraction" in logdat
+    with h5py.File(job2["path_out"]) as h5:
+        assert "deform" in h5["events"]
+        assert "image" in h5["events"]
+        assert "image_bg" in h5["events"]
+        assert len(h5["events/deform"]) == 395
+        assert h5.attrs["pipeline:dcnum mapping"] == "0"
+        assert h5.attrs["pipeline:dcnum yield"] == 395
+@pytest.mark.parametrize("index_mapping,size,mapping_out", [
+    (5, 11, "5"),
+    (slice(3, 5, None), 6, "3-5-n"),
+    ([3, 5, 6, 7], 7, "h-6e582938"),
+])
+def test_index_mapping_pipeline(index_mapping, size, mapping_out):
+    """Test running the same pipeline twice
+    When the pipeline is run on a file with the same pipeline
+    identifier, data are just copied over. Nothing much fancy else.
+    """
+    path_orig = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
+    path = path_orig.with_name("input.rtdc")
+    path2 = path.with_name("path_intermediate.rtdc")
+    with read.concatenated_hdf5_data(5 * [path_orig], path_out=path):
+        pass
+    job = logic.DCNumPipelineJob(
+        path_in=path,
+        path_out=path2,
+        data_kwargs={"index_mapping": index_mapping},
+        background_code="copy",
+        segmenter_code="thresh",
+        segmenter_kwargs={"thresh": -6,
+                          "kwargs_mask": {"closing_disk": 0}},
+        debug=True)
+    assert job.kwargs["data_kwargs"]["index_mapping"] == index_mapping
+    # perform the initial pipeline
+    with logic.DCNumJobRunner(job=job) as runner:
+        runner.run()
+    # Sanity checks for initial job
+    with read.HDF5Data(job["path_out"]) as hd:
+        # Check the logs
+        logdat = " ".join(get_log(hd, time.strftime("dcnum-log-%Y")))
+        assert "Starting background computation" in logdat
+        assert "Finished background computation" in logdat
+        assert "Starting segmentation and feature extraction" in logdat
+        assert "Flushing data to disk" in logdat
+        assert "Finished segmentation and feature extraction" in logdat
+    with h5py.File(job["path_out"]) as h5:
+        assert "deform" in h5["events"]
+        assert "image" in h5["events"]
+        assert "image_bg" in h5["events"]
+        assert len(h5["events/deform"]) == size
+        assert h5.attrs["pipeline:dcnum mapping"] == mapping_out
 def test_duplicate_transfer_basin_data():
     """task_transfer_basin_data should not copy basin data from input"""
     path_orig = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
@@ -335,7 +541,7 @@ def test_simple_pipeline(debug):
     # this is the default pipeline
     gen_id = ppid.DCNUM_PPID_GENERATION
-    dat_id = "hdf:p=0.2645"
+    dat_id = "hdf:p=0.2645^i=0"
     bg_id = "sparsemed:k=200^s=1^t=0^f=0.8^o=1"
     seg_id = "thresh:t=-6:cle=1^f=1^clo=0"
     feat_id = "legacy:b=1^h=1^v=1"
@@ -402,7 +608,7 @@ def test_simple_pipeline_no_offset_correction(debug):
     # this is the default pipeline
     gen_id = ppid.DCNUM_PPID_GENERATION
-    dat_id = "hdf:p=0.2645"
+    dat_id = "hdf:p=0.2645^i=0"
     bg_id = "sparsemed:k=200^s=1^t=0^f=0.8^o=0"
     seg_id = "thresh:t=-6:cle=1^f=1^clo=0"
     feat_id = "legacy:b=1^h=1^v=1"
@@ -474,7 +680,7 @@ def test_simple_pipeline_in_thread():
 @pytest.mark.parametrize("attr,oldval,newbg", [
     # Changes that trigger computation of new background
     ["pipeline:dcnum generation", "1", True],
-    ["pipeline:dcnum data", "hdf:p=0.2656", True],
+    ["pipeline:dcnum data", "hdf:p=0.2656^i=0", True],
     ["pipeline:dcnum background", "sparsemed:k=100^s=1^t=0^f=0.8^o=1", True],
     # Changes that don't trigger background computation
     ["pipeline:dcnum segmenter", "thresh:t=-1:cle=1^f=1^clo=2", False],
@@ -505,7 +711,7 @@ def test_recomputation_of_background_metadata_changed(attr, oldval, newbg):
         # Set the default values
         h5.attrs["pipeline:dcnum generation"] = ppid.DCNUM_PPID_GENERATION
-        h5.attrs["pipeline:dcnum data"] = "hdf:p=0.2645"
+        h5.attrs["pipeline:dcnum data"] = "hdf:p=0.2645^i=0"
         h5.attrs["pipeline:dcnum background"] = \
             "sparsemed:k=200^s=1^t=0^f=0.8^o=1"
         h5.attrs["pipeline:dcnum segmenter"] = "thresh:t=-6:cle=1^f=1^clo=2"
@@ -553,7 +759,7 @@ def test_task_background():
     # this is the default pipeline
     gen_id = ppid.DCNUM_PPID_GENERATION
-    dat_id = "hdf:p=0.2645"
+    dat_id = "hdf:p=0.2645^i=0"
     bg_id = "sparsemed:k=200^s=1^t=0^f=0.8^o=1"
     seg_id = "thresh:t=-6:cle=1^f=1^clo=2"
     feat_id = "legacy:b=1^h=1^v=1"

{dcnum-0.18.0 → dcnum-0.19.1}/tests/test_meta_ppid_base.py RENAMED Viewed

@@ -32,13 +32,13 @@ class ExampleClass:
 def test_compute_pipeline_hash():
     pp_hash = ppid.compute_pipeline_hash(
         gen_id="7",
-        dat_id="hdf:p=0.34",
+        dat_id="hdf:p=0.34^i=0",
         bg_id="sparsemed:k=200^s=1^t=0^f=0.8^o=1",
         seg_id="thresh:t=-3:cle=1^f=1^clo=2",
         feat_id="legacy:b=1^h=0^v=1",
         gate_id="norm:o=0^s=11",
     )
-    assert pp_hash == "2e56aa93fcb264381c90a8fd181b3fbc"
+    assert pp_hash == "4f3a850410b9801393ab5738afe69e9a"
 @pytest.mark.parametrize("in_list,out_list", [

{dcnum-0.18.0 → dcnum-0.19.1}/tests/test_meta_ppid_data.py RENAMED Viewed

@@ -1,5 +1,7 @@
 from dcnum.read import HDF5Data
+import pytest
 def test_ppid_decoding_dat_check_kwargs():
     dat_ppid = "hdf:p=0.2658"
@@ -7,17 +9,30 @@ def test_ppid_decoding_dat_check_kwargs():
     assert kwargs["pixel_size"] == 0.2658
+@pytest.mark.parametrize("imppid,value", [
+    ["0", None],
+    ["10", 10],
+    ["10-20-n", slice(10, 20)],
+    ["10-20-2", slice(10, 20, 2)],
+    ["n-n-2", slice(None, None, 2)],
+])
+def test_ppid_decoding_dat_check_kwargs_index_mapping(imppid, value):
+    dat_ppid = f"hdf:p=0.2658^i={imppid}"
+    kwargs = HDF5Data.get_ppkw_from_ppid(dat_ppid)
+    assert kwargs["index_mapping"] == value
 def test_ppid_encoding_dat_check_kwargs():
     kwargs = {"pixel_size": 0.34}
     ppid = HDF5Data.get_ppid_from_ppkw(kwargs)
-    assert ppid == "hdf:p=0.34"
+    assert ppid == "hdf:p=0.34^i=0"
 def test_ppid_encoding_dat_check_kwargs_acc():
     # accuracy for pixel_size is 8 digits after the decimal point
     kwargs = {"pixel_size": 0.3400000036}
     ppid = HDF5Data.get_ppid_from_ppkw(kwargs)
-    assert ppid == "hdf:p=0.34"
+    assert ppid == "hdf:p=0.34^i=0"
 def test_ppid_required_method_definitions():

{dcnum-0.18.0 → dcnum-0.19.1}/tests/test_read_hdf5.py RENAMED Viewed

@@ -49,10 +49,10 @@ def test_get_ppid():
         "fmt-hdf5_cytoshot_full-features_legacy_allev_2023.zip")
     with read.HDF5Data(path) as hd:
-        assert hd.get_ppid() == "hdf:p=0.2645"
+        assert hd.get_ppid() == "hdf:p=0.2645^i=0"
     with read.HDF5Data(path, pixel_size=0.49) as hd:
-        assert hd.get_ppid() == "hdf:p=0.49"
+        assert hd.get_ppid() == "hdf:p=0.49^i=0"
 def test_get_ppkw_from_ppid_error_bad_code():
@@ -64,7 +64,7 @@ def test_get_ppkw_from_ppid_error_bad_code():
 def test_get_ppkw_from_ppid_error_bad_parameter():
     with pytest.raises(ValueError,
                        match="Invalid parameter 'k'"):
-        read.HDF5Data.get_ppkw_from_ppid("hdf:k=0.44")
+        read.HDF5Data.get_ppkw_from_ppid("hdf:k=0.44^i=0")
 def test_get_ppkw_from_ppid_pixel_size():
@@ -158,6 +158,25 @@ def test_image_cache_iter_chunks(size, chunks, tmp_path):
         assert list(hic.iter_chunks()) == list(range(chunks))
+@pytest.mark.parametrize("index_mapping,result_data", [
+    [2, [0, 1]],
+    [slice(1, 10, 2), [1, 3, 5, 7, 9]],
+    [slice(1, 11, 3), [1, 4, 7, 10]],
+    [slice(10, 11), [10]],
+    [slice(1, 3, None), [1, 2]],
+])
+def test_index_mapping(index_mapping, result_data):
+    path = retrieve_data(
+        "fmt-hdf5_cytoshot_full-features_legacy_allev_2023.zip")
+    with h5py.File(path, "a") as h5:
+        size = len(h5["events/image"])
+        assert size == 11
+        h5["events/temp"] = np.arange(size, dtype=np.float64)
+    with read.HDF5Data(path, index_mapping=index_mapping) as hd:
+        assert np.allclose(hd["temp"], result_data)
 def test_keyerror_when_image_is_none(tmp_path):
     path = tmp_path / "test.hdf5"
     with h5py.File(path, "w") as hw: