PyPI - dclab - Versions diffs - 0.67.0__cp314-cp314t-macosx_11_0_arm64.whl - Mend

dclab 0.67.0__cp314-cp314t-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dclab might be problematic. Click here for more details.

Files changed (142) hide show

dclab/__init__.py +41 -0
dclab/_version.py +34 -0
dclab/cached.py +97 -0
dclab/cli/__init__.py +10 -0
dclab/cli/common.py +237 -0
dclab/cli/task_compress.py +126 -0
dclab/cli/task_condense.py +223 -0
dclab/cli/task_join.py +229 -0
dclab/cli/task_repack.py +98 -0
dclab/cli/task_split.py +154 -0
dclab/cli/task_tdms2rtdc.py +186 -0
dclab/cli/task_verify_dataset.py +75 -0
dclab/definitions/__init__.py +79 -0
dclab/definitions/feat_const.py +202 -0
dclab/definitions/feat_logic.py +182 -0
dclab/definitions/meta_const.py +252 -0
dclab/definitions/meta_logic.py +111 -0
dclab/definitions/meta_parse.py +94 -0
dclab/downsampling.cpython-314t-darwin.so +0 -0
dclab/downsampling.pyx +230 -0
dclab/external/__init__.py +4 -0
dclab/external/packaging/LICENSE +3 -0
dclab/external/packaging/LICENSE.APACHE +177 -0
dclab/external/packaging/LICENSE.BSD +23 -0
dclab/external/packaging/__init__.py +6 -0
dclab/external/packaging/_structures.py +61 -0
dclab/external/packaging/version.py +505 -0
dclab/external/skimage/LICENSE +28 -0
dclab/external/skimage/__init__.py +2 -0
dclab/external/skimage/_find_contours.py +216 -0
dclab/external/skimage/_find_contours_cy.cpython-314t-darwin.so +0 -0
dclab/external/skimage/_find_contours_cy.pyx +188 -0
dclab/external/skimage/_pnpoly.cpython-314t-darwin.so +0 -0
dclab/external/skimage/_pnpoly.pyx +99 -0
dclab/external/skimage/_shared/__init__.py +1 -0
dclab/external/skimage/_shared/geometry.cpython-314t-darwin.so +0 -0
dclab/external/skimage/_shared/geometry.pxd +6 -0
dclab/external/skimage/_shared/geometry.pyx +55 -0
dclab/external/skimage/measure.py +7 -0
dclab/external/skimage/pnpoly.py +53 -0
dclab/external/statsmodels/LICENSE +35 -0
dclab/external/statsmodels/__init__.py +6 -0
dclab/external/statsmodels/nonparametric/__init__.py +1 -0
dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
dclab/external/statsmodels/nonparametric/kernels.py +36 -0
dclab/features/__init__.py +9 -0
dclab/features/bright.py +81 -0
dclab/features/bright_bc.py +93 -0
dclab/features/bright_perc.py +63 -0
dclab/features/contour.py +161 -0
dclab/features/emodulus/__init__.py +339 -0
dclab/features/emodulus/load.py +252 -0
dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
dclab/features/emodulus/pxcorr.py +135 -0
dclab/features/emodulus/scale_linear.py +247 -0
dclab/features/emodulus/viscosity.py +260 -0
dclab/features/fl_crosstalk.py +95 -0
dclab/features/inert_ratio.py +377 -0
dclab/features/volume.py +242 -0
dclab/http_utils.py +322 -0
dclab/isoelastics/__init__.py +468 -0
dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
dclab/kde/__init__.py +1 -0
dclab/kde/base.py +459 -0
dclab/kde/contours.py +222 -0
dclab/kde/methods.py +313 -0
dclab/kde_contours.py +10 -0
dclab/kde_methods.py +11 -0
dclab/lme4/__init__.py +5 -0
dclab/lme4/lme4_template.R +94 -0
dclab/lme4/rsetup.py +204 -0
dclab/lme4/wrapr.py +386 -0
dclab/polygon_filter.py +398 -0
dclab/rtdc_dataset/__init__.py +15 -0
dclab/rtdc_dataset/check.py +902 -0
dclab/rtdc_dataset/config.py +533 -0
dclab/rtdc_dataset/copier.py +353 -0
dclab/rtdc_dataset/core.py +896 -0
dclab/rtdc_dataset/export.py +867 -0
dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
dclab/rtdc_dataset/feat_basin.py +762 -0
dclab/rtdc_dataset/feat_temp.py +102 -0
dclab/rtdc_dataset/filter.py +263 -0
dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
dclab/rtdc_dataset/fmt_dict.py +103 -0
dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
dclab/rtdc_dataset/fmt_http.py +102 -0
dclab/rtdc_dataset/fmt_s3.py +354 -0
dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
dclab/rtdc_dataset/load.py +77 -0
dclab/rtdc_dataset/meta_table.py +25 -0
dclab/rtdc_dataset/writer.py +1019 -0
dclab/statistics.py +226 -0
dclab/util.py +176 -0
dclab/warn.py +15 -0
dclab-0.67.0.dist-info/METADATA +153 -0
dclab-0.67.0.dist-info/RECORD +142 -0
dclab-0.67.0.dist-info/WHEEL +6 -0
dclab-0.67.0.dist-info/entry_points.txt +8 -0
dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
dclab-0.67.0.dist-info/top_level.txt +1 -0

dclab/rtdc_dataset/fmt_hdf5/base.py ADDED Viewed

@@ -0,0 +1,192 @@
+"""RT-DC hdf5 format"""
+from __future__ import annotations
+import io
+import json
+import pathlib
+from typing import Any, BinaryIO, Dict
+import warnings
+import h5py
+from ...external.packaging import parse as parse_version
+from ...util import hashobj, hashfile
+from ..config import Configuration
+from ..core import RTDCBase
+from . import events
+from . import logs
+from . import tables
+#: rtdc files exported with dclab prior to this version are not supported
+MIN_DCLAB_EXPORT_VERSION = "0.3.3.dev2"
+class OldFormatNotSupportedError(BaseException):
+    pass
+class UnknownKeyWarning(UserWarning):
+    pass
+class RTDC_HDF5(RTDCBase):
+    def __init__(self,
+                 h5path: str | pathlib.Path | BinaryIO | io.IOBase,
+                 h5kwargs: Dict[str, Any] = None,
+                 *args,
+                 **kwargs):
+        """HDF5 file format for RT-DC measurements
+        Parameters
+        ----------
+        h5path: str or pathlib.Path or file-like object
+            Path to an '.rtdc' measurement file or a file-like object
+        h5kwargs: dict
+            Additional keyword arguments given to :class:`h5py.File`
+        *args:
+            Arguments for `RTDCBase`
+        **kwargs:
+            Keyword arguments for `RTDCBase`
+        Attributes
+        ----------
+        path: pathlib.Path
+            Path to the experimental HDF5 (.rtdc) file
+        """
+        super(RTDC_HDF5, self).__init__(*args, **kwargs)
+        # Any subclass from RTDC_HDF5 is probably a remote-type and should
+        # not be able to access local basins. If you do not agree, please
+        # enable this in the definition of the subclass.
+        self._local_basins_allowed = True if self.format == "hdf5" else False
+        if isinstance(h5path, (str, pathlib.Path)):
+            h5path = pathlib.Path(h5path)
+        else:
+            h5path = h5path
+        self._hash = None
+        self.path = h5path
+        # Increase the read cache (which defaults to 1MiB), since
+        # normally we have around 2.5MiB image chunks.
+        if h5kwargs is None:
+            h5kwargs = {}
+        h5kwargs.setdefault("rdcc_nbytes", 10 * 1024 ** 2)
+        h5kwargs.setdefault("rdcc_w0", 0)
+        self.h5kwargs = h5kwargs
+        self.h5file = h5py.File(h5path, **h5kwargs)
+        self._events = events.H5Events(self.h5file)
+        # Parse configuration
+        self.config = RTDC_HDF5.parse_config(self.h5file)
+        # Override logs property with HDF5 data
+        self.logs = logs.H5Logs(self.h5file)
+        # Override the tables property with HDF5 data
+        self.tables = tables.H5Tables(self.h5file)
+        # check version
+        rtdc_soft = self.config["setup"].get("software version", "unknown")
+        if rtdc_soft.startswith("dclab "):
+            rtdc_ver = parse_version(rtdc_soft.split(" ")[1])
+            if rtdc_ver < parse_version(MIN_DCLAB_EXPORT_VERSION):
+                msg = "The file {} was created ".format(self.path) \
+                      + "with dclab {} which is ".format(rtdc_ver) \
+                      + "not supported anymore! Please rerun " \
+                      + "dclab-tdms2rtdc / export the data again."
+                raise OldFormatNotSupportedError(msg)
+        self.title = "{} - M{}".format(
+            self.config["experiment"].get("sample", "undefined sample"),
+            self.config["experiment"].get("run index", "0"))
+    def close(self):
+        """Close the underlying HDF5 file"""
+        super(RTDC_HDF5, self).close()
+        self.h5file.close()
+    @property
+    def _h5(self):
+        warnings.warn("Access to the underlying HDF5 file is now public. "
+                      "Please use the `h5file` attribute instead of `_h5`!",
+                      DeprecationWarning)
+        return self.h5file
+    @staticmethod
+    def can_open(h5path):
+        """Check whether a given file is in the .rtdc file format"""
+        h5path = pathlib.Path(h5path)
+        if h5path.suffix == ".rtdc":
+            return True
+        else:
+            # we don't know the extension; check for the "events" group
+            canopen = False
+            try:
+                # This is a workaround for Python2 where h5py cannot handle
+                # unicode file names.
+                with h5path.open("rb") as fd:
+                    h5 = h5py.File(fd, "r")
+                    if "events" in h5:
+                        canopen = True
+            except IOError:
+                # not an HDF5 file
+                pass
+            return canopen
+    @staticmethod
+    def parse_config(h5path):
+        """Parse the RT-DC configuration of an HDF5 file
+        `h5path` may be a h5py.File object or an actual path
+        """
+        if not isinstance(h5path, h5py.File):
+            with h5py.File(h5path, mode="r") as fh5:
+                h5attrs = dict(fh5.attrs)
+        else:
+            h5attrs = dict(h5path.attrs)
+        # Convert byte strings to unicode strings
+        # https://github.com/h5py/h5py/issues/379
+        for key in h5attrs:
+            if isinstance(h5attrs[key], bytes):
+                h5attrs[key] = h5attrs[key].decode("utf-8")
+        config = Configuration()
+        for key in h5attrs:
+            section, pname = key.split(":")
+            config[section][pname] = h5attrs[key]
+        return config
+    @property
+    def hash(self):
+        """Hash value based on file name and content"""
+        if self._hash is None:
+            tohash = [self.path.name,
+                      # Hash a maximum of ~1MB of the hdf5 file
+                      hashfile(self.path, blocksize=65536, count=20)]
+            self._hash = hashobj(tohash)
+        return self._hash
+    def basins_get_dicts(self):
+        """Return list of dicts for all basins defined in `self.h5file`"""
+        return self.basin_get_dicts_from_h5file(self.h5file)
+    @staticmethod
+    def basin_get_dicts_from_h5file(h5file):
+        """Return list of dicts for all basins defined in `h5file`"""
+        basins = []
+        # Do not sort anything here, sorting is done in `RTDCBase`.
+        for bk in h5file.get("basins", []):
+            bdat = list(h5file["basins"][bk])
+            if isinstance(bdat[0], bytes):
+                bdat = [bi.decode("utf") for bi in bdat]
+            bdict = json.loads(" ".join(bdat))
+            bdict["key"] = bk
+            basins.append(bdict)
+        return basins

dclab/rtdc_dataset/fmt_hdf5/basin.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""RT-DC hdf5 format"""
+from __future__ import annotations
+import pathlib
+from .. import feat_basin
+from .base import RTDC_HDF5
+class HDF5Basin(feat_basin.Basin):
+    basin_format = "hdf5"
+    basin_type = "file"
+    def __init__(self, *args, **kwargs):
+        self._available_verified = None
+        super(HDF5Basin, self).__init__(*args, **kwargs)
+    def _load_dataset(self, location, **kwargs):
+        return RTDC_HDF5(location, **kwargs)
+    def is_available(self):
+        if self._available_verified is None:
+            with self._av_check_lock:
+                try:
+                    self._available_verified = \
+                        pathlib.Path(self.location).exists()
+                except OSError:
+                    pass
+        return self._available_verified

dclab/rtdc_dataset/fmt_hdf5/events.py ADDED Viewed

@@ -0,0 +1,276 @@
+"""RT-DC hdf5 format"""
+from __future__ import annotations
+import pathlib
+import warnings
+import numbers
+import numpy as np
+from ... import definitions as dfn
+from ...util import copy_if_needed
+from . import feat_defect
+class H5ContourEvent:
+    def __init__(self, h5group, length=None):
+        self._length = length
+        self.h5group = h5group
+        # for hashing in util.obj2bytes
+        # path within the HDF5 file
+        o_name = h5group["0"].name,
+        # filename
+        o_filename = h5group.file.filename
+        _data = [o_name, o_filename]
+        if pathlib.Path(o_filename).exists():
+            # when the file was changed
+            _data.append(pathlib.Path(h5group.file.filename).stat().st_mtime)
+            # size of the file
+            _data.append(pathlib.Path(h5group.file.filename).stat().st_size)
+        self.identifier = _data
+    def __getitem__(self, key):
+        if not isinstance(key, numbers.Integral):
+            # slicing!
+            indices = np.arange(len(self))[key]
+            output = []
+            # populate the output list
+            for evid in indices:
+                output.append(self.h5group[str(evid)][:])
+            return output
+        elif key < 0:
+            return self.__getitem__(key + len(self))
+        else:
+            return self.h5group[str(key)][:]
+    def __iter__(self):
+        for idx in range(len(self)):
+            yield self[idx]
+    def __len__(self):
+        if self._length is None:
+            # computing the length of an H5Group is slow
+            self._length = len(self.h5group)
+        return self._length
+    @property
+    def dtype(self):
+        return self.h5group["0"].dtype
+    @property
+    def shape(self):
+        return len(self), np.nan, 2
+class H5Events:
+    def __init__(self, h5):
+        self.h5file = h5
+        # According to https://github.com/h5py/h5py/issues/1960, we always
+        # have to keep a reference to the HDF5 dataset, otherwise it will
+        # be garbage-collected immediately. In addition to caching the HDF5
+        # datasets, we cache the wrapping classes in the `self._cached_events`
+        # dictionary.
+        self._cached_events = {}
+        self._defective_features = {}
+        self._features_list = None
+    @property
+    def _features(self):
+        if self._features_list is None:
+            self._features_list = sorted(self.h5file["events"].keys())
+            # make sure that "trace" is not empty
+            if ("trace" in self._features
+                    and len(self.h5file["events"]["trace"]) == 0):
+                self._features_list.remove("trace")
+        return self._features_list
+    def __contains__(self, key):
+        return key in self.keys()
+    def __getitem__(self, key):
+        if key not in self._cached_events:
+            # user-level checking is done in core.py
+            assert dfn.feature_exists(key), f"Feature '{key}' does not exist!"
+            data = self.h5file["events"][key]
+            if key == "contour":
+                length = self.h5file.attrs.get("experiment:event count")
+                fdata = H5ContourEvent(data, length=length)
+            elif key == "mask":
+                fdata = H5MaskEvent(data)
+            elif key == "trace":
+                fdata = H5TraceEvent(data)
+            elif data.ndim == 1:
+                fdata = H5ScalarEvent(data)
+            else:
+                # for features like "image", "image_bg" and other non-scalar
+                # ancillary features
+                fdata = data
+            self._cached_events[key] = fdata
+        return self._cached_events[key]
+    def __iter__(self):
+        # dict-like behavior
+        for key in self.keys():
+            yield key
+    def _is_defective_feature(self, feat):
+        """Whether the stored feature is defective"""
+        if feat not in self._defective_features:
+            defective = False
+            if (feat in feat_defect.DEFECTIVE_FEATURES
+                    and feat in self._features):
+                # feature exists in the HDF5 file
+                # workaround machinery for sorting out defective features
+                defective = feat_defect.DEFECTIVE_FEATURES[feat](self.h5file)
+            self._defective_features[feat] = defective
+        return self._defective_features[feat]
+    def keys(self):
+        """Returns list of valid features
+        Checks for
+        - defective features: whether the data in the HDF5 file is invalid
+        - existing feature names: dynamic, depending on e.g. plugin features
+        """
+        features = []
+        for key in self._features:
+            # check for defective features
+            if dfn.feature_exists(key) and not self._is_defective_feature(key):
+                features.append(key)
+        return features
+class H5MaskEvent:
+    """Cast uint8 masks to boolean"""
+    def __init__(self, h5dataset):
+        self.h5dataset = h5dataset
+        # identifier required because "mask" is used for computation
+        # of ancillary feature "contour".
+        self.identifier = (self.h5dataset.file.filename, self.h5dataset.name)
+        self.dtype = np.dtype(bool)
+    def __array__(self, dtype=np.bool_, copy=copy_if_needed, *args, **kwargs):
+        if dtype is not np.uint8:
+            warnings.warn("Please avoid calling the `__array__` method of the "
+                          "`H5MaskEvent`. It may consume a lot of memory.",
+                          UserWarning)
+        # One of the reasons why we implement __array__ is such that
+        # the data exporter knows this object is sliceable
+        # (see yield_filtered_array_stacks).
+        return self.h5dataset.__array__(dtype=dtype, *args, **kwargs)
+    def __getitem__(self, idx):
+        return np.asarray(self.h5dataset[idx], dtype=bool)
+    def __iter__(self):
+        for idx in range(len(self)):
+            yield self[idx]
+    def __len__(self):
+        return len(self.h5dataset)
+    @property
+    def attrs(self):
+        return self.h5dataset.attrs
+    @property
+    def shape(self):
+        return self.h5dataset.shape
+    @property
+    def size(self):
+        return np.prod(self.shape)
+class H5ScalarEvent(np.lib.mixins.NDArrayOperatorsMixin):
+    def __init__(self, h5ds):
+        """Lazy access to a scalar feature with cache"""
+        self.h5ds = h5ds
+        # for hashing in util.obj2bytes
+        self.identifier = (self.h5ds.file.filename, self.h5ds.name)
+        self._array = None
+        self.ndim = 1  # matplotlib might expect this from an array
+        # attrs
+        self._ufunc_attrs = dict(self.h5ds.attrs)
+    def __array__(self, dtype=None, copy=copy_if_needed, *args, **kwargs):
+        if self._array is None:
+            self._array = np.asarray(self.h5ds, *args, **kwargs)
+        return np.array(self._array, dtype=dtype, copy=copy)
+    def __getitem__(self, idx):
+        return self.__array__()[idx]
+    def __len__(self):
+        return len(self.h5ds)
+    def _fetch_ufunc_attr(self, uname, ufunc):
+        """A wrapper for calling functions on the scalar feature data
+        The ideas are:
+        1. If there is a ufunc (max/mean/min) value stored in the dataset
+           attributes, then use this one.
+        2. If the ufunc is computed, it is cached permanently in
+           self._ufunc_attrs
+        """
+        val = self._ufunc_attrs.get(uname, None)
+        if val is None:
+            val = ufunc(self.__array__())
+            self._ufunc_attrs[uname] = val
+        return val
+    def max(self, *args, **kwargs):
+        return self._fetch_ufunc_attr("max", np.nanmax)
+    def mean(self, *args, **kwargs):
+        return self._fetch_ufunc_attr("mean", np.nanmean)
+    def min(self, *args, **kwargs):
+        return self._fetch_ufunc_attr("min", np.nanmin)
+    @property
+    def dtype(self):
+        return self.h5ds.dtype
+    @property
+    def shape(self):
+        return self.h5ds.shape
+    @property
+    def size(self):
+        return len(self)
+class H5TraceEvent:
+    def __init__(self, h5group):
+        self.h5group = h5group
+        self._num_traces = None
+        self._shape = None
+    def __getitem__(self, idx):
+        return self.h5group[idx]
+    def __contains__(self, item):
+        return item in self.h5group
+    def __len__(self):
+        if self._num_traces is None:
+            self._num_traces = len(self.h5group)
+        return self._num_traces
+    def __iter__(self):
+        for key in sorted(self.h5group.keys()):
+            yield key
+    def keys(self):
+        return self.h5group.keys()
+    @property
+    def shape(self):
+        if self._shape is None:
+            atrace = list(self.h5group.keys())[0]
+            self._shape = tuple([len(self)] + list(self.h5group[atrace].shape))
+        return self._shape

dclab/rtdc_dataset/fmt_hdf5/feat_defect.py ADDED Viewed

@@ -0,0 +1,164 @@
+"""RT-DC hdf5 format"""
+from __future__ import annotations
+from ...external.packaging import parse as parse_version
+def get_software_version_from_h5(h5):
+    software_version = h5.attrs.get("setup:software version", "")
+    if isinstance(software_version, bytes):
+        software_version = software_version.decode("utf-8")
+    return software_version
+def is_defective_feature_aspect(h5):
+    """In Shape-In 2.0.6, there was a wrong variable cast"""
+    software_version = get_software_version_from_h5(h5)
+    return software_version in ["ShapeIn 2.0.6", "ShapeIn 2.0.7"]
+def is_defective_feature_time(h5):
+    """Shape-In stores the "time" feature as a low-precision float32
+    This makes time resolution for large measurements useless,
+    because times are only resolved with four digits after the
+    decimal point. Here, we first check whether the "frame" feature
+    and the [imaging]:"frame rate" configuration are set. If so,
+    then we can compute "time" as an ancillary feature which will
+    be more accurate than its float32 version.
+    """
+    # This is a necessary requirement. If we cannot compute the
+    # ancillary feature, then we cannot ignore (even inaccurate) information.
+    has_ancil = "frame" in h5["events"] and h5.attrs.get("imaging:frame rate",
+                                                         0) != 0
+    if not has_ancil:
+        return False
+    # If we have a 32 bit dataset, then things are pretty clear.
+    is_32float = h5["events/time"].dtype.char[-1] == "f"
+    if is_32float:
+        return True
+    # Consider the software
+    software_version = get_software_version_from_h5(h5)
+    # Only Shape-In stores false data, so we can ignore other recording
+    # software.
+    is_shapein = software_version.count("ShapeIn")
+    if not is_shapein:
+        return False
+    # The tricky part: dclab might have analyzed the dataset recorded by
+    # Shape-In, e.g. in a compression step. Since dclab appends its version
+    # string to the software_version, we just have to parse that and make
+    # sure that it is above 0.47.6.
+    last_version = software_version.split("|")[-1].strip()
+    if last_version.startswith("dclab"):
+        dclab_version = last_version.split()[1]
+        if parse_version(dclab_version) < parse_version("0.47.6"):
+            # written with an older version of dclab
+            return True
+    # We covered all cases:
+    # - ancillary information are available
+    # - it's not a float32 dataset
+    # - we excluded all non-Shape-In recording software
+    # - it was not written with an older version of dclab
+    return False
+def is_defective_feature_volume(h5):
+    """dclab computed volume wrong up until version 0.36.1"""
+    # first check if the scripted fix was applied
+    if "dclab_issue_141" in list(h5.get("logs", {}).keys()):
+        return False
+    # if that does not apply, check the software version
+    software_version = get_software_version_from_h5(h5)
+    if software_version:
+        last_version = software_version.split("|")[-1].strip()
+        if last_version.startswith("dclab"):
+            dclab_version = last_version.split()[1]
+            if parse_version(dclab_version) < parse_version("0.37.0"):
+                return True
+    return False
+def is_defective_feature_inert_ratio(h5):
+    """For long channels, there was an integer overflow until 0.48.1
+    The problem here is that not only the channel length, but also
+    the length of the contour play a role. All point coordinates of
+    the contour are summed up and multiplied with one another which
+    leads to integer overflows when computing mu20.
+    Thus, this test is only a best guess, but still quite fast.
+    See also https://github.com/DC-analysis/dclab/issues/212
+    """
+    # determine whether the image width is larger than 500px
+    # If this file was written with dclab, then we always have the ROI size,
+    # so we don't have to check the actual image.
+    width_large = h5.attrs.get("imaging:roi size x", 0) > 500
+    if width_large:
+        # determine whether the software version was outdated
+        software_version = get_software_version_from_h5(h5)
+        if software_version:
+            version_pipeline = [v.strip() for v in software_version.split("|")]
+            last_version = version_pipeline[-1]
+            if last_version.startswith("dclab"):
+                dclab_version = last_version.split()[1]
+                # The fix was implemented in 0.48.2, but this method here
+                # was only implemented in 0.48.3, so we might have leaked
+                # old data into new files.
+                if parse_version(dclab_version) < parse_version("0.48.3"):
+                    return True
+    return False
+def is_defective_feature_inert_ratio_raw_cvx(h5):
+    """Additional check for `inert_ratio_raw` and `inert_ratio_cvx`
+    These features were computed with Shape-In and were very likely
+    computed correctly.
+    See https://github.com/DC-analysis/dclab/issues/224
+    """
+    if is_defective_feature_inert_ratio(h5):
+        # Possibly affected. Only return False if Shape-In check is negative
+        software_version = get_software_version_from_h5(h5)
+        version_pipeline = [v.strip() for v in software_version.split("|")]
+        first_version = version_pipeline[0]
+        if first_version.startswith("ShapeIn"):
+            si_version = first_version.split()[1]
+        elif "shapein-acquisition" in h5.get("logs", []):
+            # Later versions of Shape-In do not anymore write "ShapeIn" in the
+            # version string.
+            si_version = first_version
+        else:
+            # Some other software was used to record the data and dclab
+            # very likely stored the wrong inertia ratio.
+            return True
+        # We trust Shape-In >= 2.0.5
+        if parse_version(si_version) >= parse_version("2.0.5"):
+            return False
+        return True
+    return False
+#: dictionary of defective features, defined by HDF5 attributes;
+#: if a value matches the given HDF5 attribute, the feature is
+#: considered defective
+DEFECTIVE_FEATURES = {
+    # feature: [HDF5_attribute, matching_value]
+    "aspect": is_defective_feature_aspect,
+    "inert_ratio_cvx": is_defective_feature_inert_ratio_raw_cvx,
+    "inert_ratio_prnc": is_defective_feature_inert_ratio,
+    "inert_ratio_raw": is_defective_feature_inert_ratio_raw_cvx,
+    "tilt": is_defective_feature_inert_ratio,
+    "time": is_defective_feature_time,
+    "volume": is_defective_feature_volume,
+}

dclab/rtdc_dataset/fmt_hdf5/logs.py ADDED Viewed

@@ -0,0 +1,33 @@
+class H5Logs:
+    def __init__(self, h5):
+        self.h5file = h5
+        self._cache_keys = None
+    def __getitem__(self, key):
+        if key in self.keys():
+            log = list(self.h5file["logs"][key])
+            if isinstance(log[0], bytes):
+                log = [li.decode("utf") for li in log]
+        else:
+            raise KeyError(
+                f"File {self.h5file.file.filename} does not have the log "
+                f"'{key}'. Available logs are {self.keys()}.")
+        return log
+    def __iter__(self):
+        # dict-like behavior
+        for key in self.keys():
+            yield key
+    def __len__(self):
+        return len(self.keys())
+    def keys(self):
+        if self._cache_keys is None:
+            names = []
+            if "logs" in self.h5file:
+                for key in self.h5file["logs"]:
+                    if self.h5file["logs"][key].size:
+                        names.append(key)
+            self._cache_keys = names
+        return self._cache_keys