PyPI - dclab - Versions diffs - 0.62.11__cp313-cp313-win_amd64.whl - Mend

dclab 0.62.11__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dclab might be problematic. Click here for more details.

Files changed (137) hide show

dclab/__init__.py +23 -0
dclab/_version.py +16 -0
dclab/cached.py +97 -0
dclab/cli/__init__.py +10 -0
dclab/cli/common.py +237 -0
dclab/cli/task_compress.py +126 -0
dclab/cli/task_condense.py +223 -0
dclab/cli/task_join.py +229 -0
dclab/cli/task_repack.py +98 -0
dclab/cli/task_split.py +154 -0
dclab/cli/task_tdms2rtdc.py +186 -0
dclab/cli/task_verify_dataset.py +75 -0
dclab/definitions/__init__.py +79 -0
dclab/definitions/feat_const.py +202 -0
dclab/definitions/feat_logic.py +183 -0
dclab/definitions/meta_const.py +252 -0
dclab/definitions/meta_logic.py +111 -0
dclab/definitions/meta_parse.py +94 -0
dclab/downsampling.cp313-win_amd64.pyd +0 -0
dclab/downsampling.pyx +230 -0
dclab/external/__init__.py +4 -0
dclab/external/packaging/LICENSE +3 -0
dclab/external/packaging/LICENSE.APACHE +177 -0
dclab/external/packaging/LICENSE.BSD +23 -0
dclab/external/packaging/__init__.py +6 -0
dclab/external/packaging/_structures.py +61 -0
dclab/external/packaging/version.py +505 -0
dclab/external/skimage/LICENSE +28 -0
dclab/external/skimage/__init__.py +2 -0
dclab/external/skimage/_find_contours.py +216 -0
dclab/external/skimage/_find_contours_cy.cp313-win_amd64.pyd +0 -0
dclab/external/skimage/_find_contours_cy.pyx +188 -0
dclab/external/skimage/_pnpoly.cp313-win_amd64.pyd +0 -0
dclab/external/skimage/_pnpoly.pyx +99 -0
dclab/external/skimage/_shared/__init__.py +1 -0
dclab/external/skimage/_shared/geometry.cp313-win_amd64.pyd +0 -0
dclab/external/skimage/_shared/geometry.pxd +6 -0
dclab/external/skimage/_shared/geometry.pyx +55 -0
dclab/external/skimage/measure.py +7 -0
dclab/external/skimage/pnpoly.py +53 -0
dclab/external/statsmodels/LICENSE +35 -0
dclab/external/statsmodels/__init__.py +6 -0
dclab/external/statsmodels/nonparametric/__init__.py +1 -0
dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
dclab/external/statsmodels/nonparametric/kernels.py +36 -0
dclab/features/__init__.py +9 -0
dclab/features/bright.py +81 -0
dclab/features/bright_bc.py +93 -0
dclab/features/bright_perc.py +63 -0
dclab/features/contour.py +161 -0
dclab/features/emodulus/__init__.py +339 -0
dclab/features/emodulus/load.py +252 -0
dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
dclab/features/emodulus/pxcorr.py +135 -0
dclab/features/emodulus/scale_linear.py +247 -0
dclab/features/emodulus/viscosity.py +256 -0
dclab/features/fl_crosstalk.py +95 -0
dclab/features/inert_ratio.py +377 -0
dclab/features/volume.py +242 -0
dclab/http_utils.py +322 -0
dclab/isoelastics/__init__.py +468 -0
dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
dclab/kde_contours.py +222 -0
dclab/kde_methods.py +303 -0
dclab/lme4/__init__.py +5 -0
dclab/lme4/lme4_template.R +94 -0
dclab/lme4/rsetup.py +204 -0
dclab/lme4/wrapr.py +386 -0
dclab/polygon_filter.py +398 -0
dclab/rtdc_dataset/__init__.py +15 -0
dclab/rtdc_dataset/check.py +902 -0
dclab/rtdc_dataset/config.py +533 -0
dclab/rtdc_dataset/copier.py +353 -0
dclab/rtdc_dataset/core.py +1001 -0
dclab/rtdc_dataset/export.py +737 -0
dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
dclab/rtdc_dataset/feat_basin.py +550 -0
dclab/rtdc_dataset/feat_temp.py +102 -0
dclab/rtdc_dataset/filter.py +263 -0
dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
dclab/rtdc_dataset/fmt_dcor/api.py +111 -0
dclab/rtdc_dataset/fmt_dcor/base.py +200 -0
dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
dclab/rtdc_dataset/fmt_dcor/tables.py +42 -0
dclab/rtdc_dataset/fmt_dict.py +103 -0
dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
dclab/rtdc_dataset/fmt_hdf5/events.py +257 -0
dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
dclab/rtdc_dataset/fmt_hdf5/tables.py +30 -0
dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
dclab/rtdc_dataset/fmt_http.py +102 -0
dclab/rtdc_dataset/fmt_s3.py +320 -0
dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
dclab/rtdc_dataset/load.py +72 -0
dclab/rtdc_dataset/writer.py +985 -0
dclab/statistics.py +203 -0
dclab/util.py +156 -0
dclab/warn.py +15 -0
dclab-0.62.11.dist-info/LICENSE +343 -0
dclab-0.62.11.dist-info/METADATA +146 -0
dclab-0.62.11.dist-info/RECORD +137 -0
dclab-0.62.11.dist-info/WHEEL +5 -0
dclab-0.62.11.dist-info/entry_points.txt +8 -0
dclab-0.62.11.dist-info/top_level.txt +1 -0

dclab/statistics.py ADDED Viewed

@@ -0,0 +1,203 @@
+"""Statistics computation for RT-DC dataset instances"""
+import numpy as np
+import traceback as tb
+import warnings
+from . import definitions as dfn
+class BadMethodWarning(UserWarning):
+    pass
+class Statistics(object):
+    available_methods = {}
+    def __init__(self, name, method, req_feature=False):
+        """A helper class for computing statistics
+        All statistical methods are registered in the dictionary
+        `Statistics.available_methods`.
+        """
+        self.method = method
+        self.name = name
+        self.req_feature = req_feature
+        Statistics.available_methods[name] = self
+    def __call__(self, **kwargs):
+        data = self._get_data(kwargs)
+        if len(data) == 0:
+            result = np.nan
+        else:
+            try:
+                result = self.method(data)
+            except BaseException:
+                exc = tb.format_exc().replace("\n", "\n    | ")
+                warnings.warn("Failed to compute {} for {}: {}".format(
+                              self.name, kwargs["ds"].title, exc),
+                              BadMethodWarning)
+                result = np.nan
+        return result
+    def _get_data(self, kwargs):
+        """Convenience wrapper to get statistics data"""
+        if "ds" not in kwargs:
+            raise ValueError("Keyword argument 'ds' missing.")
+        ds = kwargs["ds"]
+        if self.req_feature:
+            if "feature" not in kwargs:
+                raise ValueError("Keyword argument 'feature' missing.")
+            return self.get_feature(ds, kwargs["feature"])
+        else:
+            return ds
+    def get_feature(self, ds, feat):
+        """Return filtered feature data
+        The features are filtered according to the user-defined filters,
+        using the information in `ds.filter.all`. In addition, all
+        `nan` and `inf` values are purged.
+        Parameters
+        ----------
+        ds: dclab.rtdc_dataset.RTDCBase
+            The dataset containing the feature
+        feat: str
+            The name of the feature; must be a scalar feature
+        """
+        if ds.config["filtering"]["enable filters"]:
+            x = ds[feat][ds.filter.all]
+        else:
+            x = ds[feat]
+        bad = np.isnan(x) | np.isinf(x)
+        xout = x[~bad]
+        return xout
+def flow_rate(ds):
+    """Return the flow rate of an RT-DC dataset"""
+    conf = ds.config["setup"]
+    if "flow rate" in conf:
+        return conf["flow rate"]
+    else:
+        return np.nan
+def get_statistics(ds, methods=None, features=None):
+    """Compute statistics for an RT-DC dataset
+    Parameters
+    ----------
+    ds: dclab.rtdc_dataset.RTDCBase
+        The dataset for which to compute the statistics.
+    methods: list of str or None
+        The methods wih which to compute the statistics.
+        The list of available methods is given with
+        `dclab.statistics.Statistics.available_methods.keys()`
+        If set to `None`, statistics for all methods are computed.
+    features: list of str
+        Feature name identifiers are defined by
+        `dclab.definitions.feature_exists`.
+        If set to `None`, statistics for all scalar features
+        available are computed.
+    Returns
+    -------
+    header: list of str
+        The header (feature + method names) of the computed statistics.
+    values: list of float
+        The computed statistics.
+    """
+    if methods is None:
+        cls = list(Statistics.available_methods.keys())
+        # sort the features in a usable way
+        avm = Statistics.available_methods
+        me1 = [m for m in cls if not avm[m].req_feature]
+        me2 = [m for m in cls if avm[m].req_feature]
+        methods = me1 + me2
+    if features is None:
+        features = ds.features_scalar
+    else:
+        features = [a.lower() for a in features]
+    header = []
+    values = []
+    # First loop over all methods that do not require a feature
+    for mt in methods:
+        meth = Statistics.available_methods[mt]
+        if not meth.req_feature:
+            values.append(meth(ds=ds))
+            header.append(mt)
+    # To make sure that all methods are computed for each feature in a block,
+    # we loop over all features. It would be easier to loop over the methods,
+    # but the ordering of the resulting statistics would not be human-friendly.
+    for ft in features:
+        for mt in methods:
+            meth = Statistics.available_methods[mt]
+            if meth.req_feature:
+                if ft in ds:
+                    values.append(meth(ds=ds, feature=ft))
+                else:
+                    values.append(np.nan)
+                label = dfn.get_feature_label(ft, rtdc_ds=ds)
+                header.append(" ".join([mt, label]))
+    return header, values
+def mode(data):
+    """Compute an intelligent value for the mode
+    The most common value in experimental is not very useful if there
+    are a lot of digits after the comma. This method approaches this
+    issue by rounding to bin size that is determined by the
+    Freedman–Diaconis rule.
+    Parameters
+    ----------
+    data: 1d ndarray
+        The data for which the mode should be computed.
+    Returns
+    -------
+    mode: float
+        The mode computed with the Freedman-Diaconis rule.
+    """
+    # size
+    n = data.shape[0]
+    # interquartile range
+    iqr = np.percentile(data, 75)-np.percentile(data, 25)
+    # Freedman–Diaconis
+    bin_size = 2 * iqr / n**(1/3)
+    if bin_size == 0:
+        return np.nan
+    # Add bin_size/2, because we want the center of the bin and
+    # not the left corner of the bin.
+    databin = np.round(data/bin_size)*bin_size + bin_size/2
+    u, indices = np.unique(databin, return_inverse=True)
+    mode = u[np.argmax(np.bincount(indices))]
+    return mode
+# Register all the methods
+# Methods that require an axis
+Statistics(name="Mean",   req_feature=True, method=np.average)
+Statistics(name="Median", req_feature=True, method=np.median)
+Statistics(name="Mode",   req_feature=True, method=mode)
+Statistics(name="SD",     req_feature=True, method=np.std)
+# Methods that work on RTDCBase
+Statistics(name="Events",
+           method=lambda mm: np.sum(mm.filter.all))
+Statistics(name="%-gated",
+           method=lambda mm: np.average(mm.filter.all)*100)
+Statistics(name="Flow rate",
+           method=lambda mm: flow_rate(mm))

dclab/util.py ADDED Viewed

@@ -0,0 +1,156 @@
+"""Utility methods"""
+import functools
+import hashlib
+import numbers
+import pathlib
+import warnings
+import h5py
+import numpy as np
+from .rtdc_dataset.config import Configuration, ConfigurationDict
+if np.lib.NumpyVersion(np.__version__) >= "2.0.0":
+    copy_if_needed = None
+else:
+    copy_if_needed = False
+class file_monitoring_lru_cache:
+    """Decorator for caching data extracted from files
+    The function that is decorated with `file_monitoring_lru_cache`
+    must accept `path` as its first argument. Caching is
+    done with an `lru_cache`. In addition to the full path
+    and the other arguments to the decorated function, the
+    size and the modification time of `path` is used as a
+    key for the decorator.
+    If the path does not exist, no caching is done.
+    Use case: Extract and cache metadata from a file on disk
+    that may change.
+    """
+    def __init__(self, maxsize=100):
+        self.lru_cache = functools.lru_cache(maxsize=maxsize)
+        self.cached_wrapper = None
+    def __call__(self, func):
+        @self.lru_cache
+        def cached_wrapper(path, path_stats, *args, **kwargs):
+            assert path_stats, "We need stat for validating the cache"
+            return func(path, *args, **kwargs)
+        @functools.wraps(func)
+        def wrapper(path, *args, **kwargs):
+            full_path = pathlib.Path(path).resolve()
+            if full_path.exists():
+                path_stat = full_path.stat()
+                return cached_wrapper(
+                    path=full_path,
+                    path_stats=(path_stat.st_mtime_ns, path_stat.st_size),
+                    *args,
+                    **kwargs)
+            else:
+                # `func` will most-likely raise an exception
+                return func(path, *args, **kwargs)
+        wrapper.cache_clear = cached_wrapper.cache_clear
+        wrapper.cache_info = cached_wrapper.cache_info
+        return wrapper
+@file_monitoring_lru_cache(maxsize=100)
+def hashfile(fname, blocksize=65536, count=0, constructor=hashlib.md5,
+             hasher_class=None):
+    """Compute md5 hex-hash of a file
+    Parameters
+    ----------
+    fname: str or pathlib.Path
+        path to the file
+    blocksize: int
+        block size in bytes read from the file
+        (set to `0` to hash the entire file)
+    count: int
+        number of blocks read from the file
+    hasher_class: callable
+        deprecated, see use `constructor` instead
+    constructor: callable
+        hash algorithm constructor
+    """
+    if hasher_class is not None:
+        warnings.warn("The `hasher_class` argument is deprecated, please use "
+                      "`constructor` instead.")
+        constructor = hasher_class
+    path = pathlib.Path(fname)
+    hasher = constructor()
+    with path.open('rb') as fd:
+        buf = fd.read(blocksize)
+        ii = 0
+        while len(buf) > 0:
+            hasher.update(buf)
+            buf = fd.read(blocksize)
+            ii += 1
+            if count and ii == count:
+                break
+    return hasher.hexdigest()
+def hashobj(obj):
+    """Compute md5 hex-hash of a Python object"""
+    return hashlib.md5(obj2bytes(obj)).hexdigest()
+def obj2bytes(obj):
+    """Bytes representation of an object for hashing
+    Note that there is no guarantee that the bytes representation
+    returned is reproducible across sessions. This is currently the
+    case when an :class:`.RTDCBase` instance is passed. There is no
+    opinion on wether/how this should be changed.
+    """
+    if isinstance(obj, str):
+        return obj.encode("utf-8")
+    elif isinstance(obj, pathlib.Path):
+        return obj2bytes(str(obj))
+    elif isinstance(obj, (bool, numbers.Number)):
+        return str(obj).encode("utf-8")
+    elif obj is None:
+        return b"none"
+    elif isinstance(obj, np.ndarray):
+        return obj.tobytes()
+    elif isinstance(obj, tuple):
+        return obj2bytes(list(obj))
+    elif isinstance(obj, list):
+        return b"".join(obj2bytes(o) for o in obj)
+    elif isinstance(obj, dict):
+        return obj2bytes(sorted(obj.items()))
+    elif hasattr(obj, "identifier"):
+        # For RTDCBase, this identifier is not reproducible in-between
+        # sessions. We might want to change this to something that is
+        # reproducible in the future (if the need arises).
+        return obj2bytes(obj.identifier)
+    elif isinstance(obj, h5py.Dataset):
+        # path within the HDF5 file
+        o_name = obj.name
+        # filename
+        o_filename = obj.file.filename
+        _data = [o_name, o_filename]
+        if pathlib.Path(o_filename).exists():
+            # when the file was changed
+            _data.append(pathlib.Path(obj.file.filename).stat().st_mtime)
+            # size of the file
+            _data.append(pathlib.Path(obj.file.filename).stat().st_size)
+        return obj2bytes(_data)
+    elif hasattr(obj, "__array__"):  # must come after h5py.Dataset
+        return obj2bytes(obj.__array__())
+    elif isinstance(obj, Configuration):
+        return obj2bytes(obj.tostring())
+    elif isinstance(obj, ConfigurationDict):
+        return obj2bytes(dict(obj))
+    else:
+        raise ValueError("No rule to convert object '{}' to string.".
+                         format(obj.__class__))

dclab/warn.py ADDED Viewed

@@ -0,0 +1,15 @@
+class PipelineWarning(UserWarning):
+    """Super-class for warnings relevant to data analysis
+    There are those types of warnings in dclab that are
+    important to the user, because they suggest that the
+    user may not use the correct model (e.g. Young's modulus
+    computation) in his analysis pipeline. All of these
+    warnings should be subclassed from PipelineWarning
+    to allow identifying them in higher-level software
+    such as Shape-Out and to present them correctly to the
+    user.
+    """
+    pass