PyPI - dclab - Versions diffs - 0.62.16__cp39-cp39-macosx_11_0_arm64.whl → 0.63.0__cp39-cp39-macosx_11_0_arm64.whl - Mend

dclab 0.62.16__cp39-cp39-macosx_11_0_arm64.whl → 0.63.0__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dclab might be problematic. Click here for more details.

Files changed (21) hide show

dclab/__init__.py +23 -5
dclab/_version.py +2 -2
dclab/downsampling.cpython-39-darwin.so +0 -0
dclab/external/skimage/_find_contours_cy.cpython-39-darwin.so +0 -0
dclab/external/skimage/_pnpoly.cpython-39-darwin.so +0 -0
dclab/external/skimage/_shared/geometry.cpython-39-darwin.so +0 -0
dclab/kde/__init__.py +1 -0
dclab/kde/base.py +238 -0
dclab/kde/contours.py +222 -0
dclab/kde/methods.py +303 -0
dclab/kde_contours.py +7 -219
dclab/kde_methods.py +9 -301
dclab/rtdc_dataset/core.py +30 -146
dclab/rtdc_dataset/export.py +129 -37
dclab/util.py +20 -0
{dclab-0.62.16.dist-info → dclab-0.63.0.dist-info}/METADATA +4 -4
{dclab-0.62.16.dist-info → dclab-0.63.0.dist-info}/RECORD +21 -17
{dclab-0.62.16.dist-info → dclab-0.63.0.dist-info}/WHEEL +1 -1
{dclab-0.62.16.dist-info → dclab-0.63.0.dist-info}/licenses/LICENSE +1 -61
{dclab-0.62.16.dist-info → dclab-0.63.0.dist-info}/entry_points.txt +0 -0
{dclab-0.62.16.dist-info → dclab-0.63.0.dist-info}/top_level.txt +0 -0

dclab/kde_methods.py CHANGED Viewed

@@ -1,303 +1,11 @@
-"""Kernel Density Estimation methods"""
+import warnings
-import numpy as np
-from scipy.interpolate import RectBivariateSpline
-from scipy.stats import gaussian_kde, skew
+from .kde.methods import (  # noqa: F401
+    bin_num_doane, bin_width_doane, bin_width_percentile, get_bad_vals,
+    ignore_nan_inf, kde_gauss, kde_histogram, kde_multivariate, kde_none,
+    methods
+)
-from .cached import Cache
-from .external.statsmodels.nonparametric.kernel_density import KDEMultivariate
-def bin_num_doane(a):
-    """Compute number of bins based on Doane's formula
-    Notes
-    -----
-    If the bin width cannot be determined, then a bin
-    number of 5 is returned.
-    See Also
-    --------
-    bin_width_doane: method used to compute the bin width
-    """
-    bad = np.isnan(a) | np.isinf(a)
-    data = a[~bad]
-    acc = bin_width_doane(a)
-    if acc == 0 or np.isnan(acc):
-        num = 5
-    else:
-        num = int(np.round((data.max() - data.min()) / acc))
-    return num
-def bin_width_doane(a):
-    """Compute contour spacing based on Doane's formula
-    References
-    ----------
-    - `<https://en.wikipedia.org/wiki/Histogram#Number_of_bins_and_width>`_
-    - `<https://stats.stackexchange.com/questions/55134/
-      doanes-formula-for-histogram-binning>`_
-    Notes
-    -----
-    Doane's formula is actually designed for histograms. This
-    function is kept here for backwards-compatibility reasons.
-    It is highly recommended to use :func:`bin_width_percentile`
-    instead.
-    """
-    bad = np.isnan(a) | np.isinf(a)
-    data = a[~bad]
-    n = data.size
-    g1 = skew(data)
-    sigma_g1 = np.sqrt(6 * (n - 2) / ((n + 1) * (n + 3)))
-    k = 1 + np.log2(n) + np.log2(1 + np.abs(g1) / sigma_g1)
-    acc = (data.max() - data.min()) / k
-    return acc
-def bin_width_percentile(a):
-    """Compute contour spacing based on data percentiles
-    The 10th and the 90th percentile of the input data are taken.
-    The spacing then computes to the difference between those
-    two percentiles divided by 23.
-    Notes
-    -----
-    The Freedman–Diaconis rule uses the interquartile range and
-    normalizes to the third root of len(a). Such things do not
-    work very well for RT-DC data, because len(a) is huge. Here
-    we use just the top and bottom 10th percentiles with a fixed
-    normalization.
-    """
-    bad = np.isnan(a) | np.isinf(a)
-    data = a[~bad]
-    start = np.percentile(data, 10)
-    end = np.percentile(data, 90)
-    acc = (end - start) / 23
-    return acc
-def get_bad_vals(x, y):
-    return np.isnan(x) | np.isinf(x) | np.isnan(y) | np.isinf(y)
-def ignore_nan_inf(kde_method):
-    """Ignores nans and infs from the input data
-    Invalid positions in the resulting density are set to nan.
-    """
-    def new_kde_method(events_x, events_y, xout=None, yout=None,
-                       *args, **kwargs):
-        bad_in = get_bad_vals(events_x, events_y)
-        if xout is None:
-            density = np.zeros_like(events_x, dtype=np.float64)
-            bad_out = bad_in
-            xo = yo = None
-        else:
-            density = np.zeros_like(xout, dtype=np.float64)
-            bad_out = get_bad_vals(xout, yout)
-            xo = xout[~bad_out]
-            yo = yout[~bad_out]
-        # Filter events
-        ev_x = events_x[~bad_in]
-        ev_y = events_y[~bad_in]
-        density[~bad_out] = kde_method(ev_x, ev_y,
-                                       xo, yo,
-                                       *args, **kwargs)
-        density[bad_out] = np.nan
-        return density
-    doc_add = "\n    Notes\n" +\
-              "    -----\n" +\
-              "    This is a wrapped version that ignores nan and inf values."
-    new_kde_method.__doc__ = kde_method.__doc__ + doc_add
-    return new_kde_method
-@ignore_nan_inf
-@Cache
-def kde_gauss(events_x, events_y, xout=None, yout=None):
-    """ Gaussian Kernel Density Estimation
-    Parameters
-    ----------
-    events_x, events_y: 1D ndarray
-        The input points for kernel density estimation. Input
-        is flattened automatically.
-    xout, yout: ndarray
-        The coordinates at which the KDE should be computed.
-        If set to none, input coordinates are used.
-    Returns
-    -------
-    density: ndarray, same shape as `xout`
-        The KDE for the points in (xout, yout)
-    See Also
-    --------
-    `scipy.stats.gaussian_kde`
-    """
-    valid_combi = ((xout is None and yout is None) or
-                   (xout is not None and yout is not None)
-                   )
-    if not valid_combi:
-        raise ValueError("Both `xout` and `yout` must be (un)set.")
-    if xout is None and yout is None:
-        xout = events_x
-        yout = events_y
-    try:
-        estimator = gaussian_kde([events_x.flatten(), events_y.flatten()])
-        density = estimator.evaluate([xout.flatten(), yout.flatten()])
-    except np.linalg.LinAlgError:
-        # LinAlgError occurs when matrix to solve is singular (issue #117)
-        density = np.zeros(xout.shape)*np.nan
-    return density.reshape(xout.shape)
-@ignore_nan_inf
-@Cache
-def kde_histogram(events_x, events_y, xout=None, yout=None, bins=None):
-    """ Histogram-based Kernel Density Estimation
-    Parameters
-    ----------
-    events_x, events_y: 1D ndarray
-        The input points for kernel density estimation. Input
-        is flattened automatically.
-    xout, yout: ndarray
-        The coordinates at which the KDE should be computed.
-        If set to none, input coordinates are used.
-    bins: tuple (binsx, binsy)
-        The number of bins to use for the histogram.
-    Returns
-    -------
-    density: ndarray, same shape as `xout`
-        The KDE for the points in (xout, yout)
-    See Also
-    --------
-    `numpy.histogram2d`
-    `scipy.interpolate.RectBivariateSpline`
-    """
-    valid_combi = ((xout is None and yout is None) or
-                   (xout is not None and yout is not None)
-                   )
-    if not valid_combi:
-        raise ValueError("Both `xout` and `yout` must be (un)set.")
-    if xout is None and yout is None:
-        xout = events_x
-        yout = events_y
-    if bins is None:
-        bins = (max(5, bin_num_doane(events_x)),
-                max(5, bin_num_doane(events_y)))
-    # Compute the histogram
-    hist2d, xedges, yedges = np.histogram2d(x=events_x,
-                                            y=events_y,
-                                            bins=bins,
-                                            density=True)
-    xip = xedges[1:]-(xedges[1]-xedges[0])/2
-    yip = yedges[1:]-(yedges[1]-yedges[0])/2
-    estimator = RectBivariateSpline(x=xip, y=yip, z=hist2d)
-    density = estimator.ev(xout, yout)
-    density[density < 0] = 0
-    return density.reshape(xout.shape)
-def kde_none(events_x, events_y, xout=None, yout=None):
-    """No Kernel Density Estimation
-    Parameters
-    ----------
-    events_x, events_y: 1D ndarray
-        The input points for kernel density estimation. Input
-        is flattened automatically.
-    xout, yout: ndarray
-        The coordinates at which the KDE should be computed.
-        If set to none, input coordinates are used.
-    Returns
-    -------
-    density: ndarray, same shape as `xout`
-        The KDE for the points in (xout, yout)
-    Notes
-    -----
-    This method is a convenience method that always returns ones in the shape
-    that the other methods in this module produce.
-    """
-    valid_combi = ((xout is None and yout is None) or
-                   (xout is not None and yout is not None)
-                   )
-    if not valid_combi:
-        raise ValueError("Both `xout` and `yout` must be (un)set.")
-    if xout is None and yout is None:
-        xout = events_x
-        _ = events_y
-    return np.ones(xout.shape)
-@ignore_nan_inf
-@Cache
-def kde_multivariate(events_x, events_y, xout=None, yout=None, bw=None):
-    """ Multivariate Kernel Density Estimation
-    Parameters
-    ----------
-    events_x, events_y: 1D ndarray
-        The input points for kernel density estimation. Input
-        is flattened automatically.
-    bw: tuple (bwx, bwy) or None
-        The bandwith for kernel density estimation.
-    xout, yout: ndarray
-        The coordinates at which the KDE should be computed.
-        If set to none, input coordinates are used.
-    Returns
-    -------
-    density: ndarray, same shape as `xout`
-        The KDE for the points in (xout, yout)
-    See Also
-    --------
-    `statsmodels.nonparametric.kernel_density.KDEMultivariate`
-    """
-    valid_combi = ((xout is None and yout is None) or
-                   (xout is not None and yout is not None)
-                   )
-    if not valid_combi:
-        raise ValueError("Both `xout` and `yout` must be (un)set.")
-    if xout is None and yout is None:
-        xout = events_x
-        yout = events_y
-    if bw is None:
-        # divide by 2 to make it comparable to histogram KDE
-        bw = (bin_width_doane(events_x) / 2,
-              bin_width_doane(events_y) / 2)
-    positions = np.vstack([xout.flatten(), yout.flatten()])
-    estimator_ly = KDEMultivariate(data=[events_x.flatten(),
-                                         events_y.flatten()],
-                                   var_type='cc', bw=bw)
-    density = estimator_ly.pdf(positions)
-    return density.reshape(xout.shape)
-methods = {"gauss": kde_gauss,
-           "histogram": kde_histogram,
-           "none": kde_none,
-           "multivariate": kde_multivariate}
+warnings.warn("`dclab.kde_methods` is deprecated; please use "
+              "the `dclab.kde.methods` instead",
+              DeprecationWarning)

dclab/rtdc_dataset/core.py CHANGED Viewed

@@ -4,23 +4,23 @@ import hashlib
 import json
 import os.path
 import pathlib
+import random
 import traceback
 from typing import Literal
 import uuid
-import random
 import warnings
 import numpy as np
 from .. import definitions as dfn
 from .. import downsampling
+from ..kde import KernelDensityEstimator
+from ..kde import methods as kde_methods
 from ..polygon_filter import PolygonFilter
-from .. import kde_methods
 from ..util import hashobj
-from .feat_anc_core import AncillaryFeature, FEATURES_RAPID
 from . import feat_basin
 from .export import Export
+from .feat_anc_core import FEATURES_RAPID, AncillaryFeature
 from .filter import Filter
@@ -28,6 +28,10 @@ class FeatureShouldExistButNotFoundWarning(UserWarning):
     pass
+class LocalBasinForbiddenWarning(UserWarning):
+    pass
 class LogTransformWarning(UserWarning):
     pass
@@ -322,47 +326,6 @@ class RTDCBase(abc.ABC):
                     pass
         return data
-    @staticmethod
-    def _apply_scale(a, scale, feat):
-        """Helper function for transforming an aray to log-scale
-        Parameters
-        ----------
-        a: np.ndarray
-            Input array
-        scale: str
-            If set to "log", take the logarithm of `a`; if set to
-            "linear" return `a` unchanged.
-        feat: str
-            Feature name (required for debugging)
-        Returns
-        -------
-        b: np.ndarray
-            The scaled array
-        Notes
-        -----
-        If the scale is not "linear", then a new array is returned.
-        All warnings are suppressed when computing `np.log(a)`, as
-        `a` may have negative or nan values.
-        """
-        if scale == "linear":
-            b = a
-        elif scale == "log":
-            with warnings.catch_warnings(record=True) as w:
-                warnings.simplefilter("always")
-                b = np.log(a)
-                if len(w):
-                    # Tell the user that the log-transformation issued
-                    # a warning.
-                    warnings.warn("Invalid values encounterd in np.log "
-                                  "while scaling feature '{}'!".format(feat))
-        else:
-            raise ValueError("`scale` must be either 'linear' or 'log', "
-                             + "got '{}'!".format(scale))
-        return b
     @staticmethod
     def get_kde_spacing(a, scale="linear", method=kde_methods.bin_width_doane,
                         method_kw=None, feat="undefined", ret_scaled=False):
@@ -383,16 +346,14 @@ class RTDCBase(abc.ABC):
         ret_scaled: bool
             whether to return the scaled array of `a`
         """
-        if method_kw is None:
-            method_kw = {}
-        # Apply scale (no change for linear scale)
-        asc = RTDCBase._apply_scale(a, scale, feat)
-        # Apply multiplicator
-        acc = method(asc, **method_kw)
-        if ret_scaled:
-            return acc, asc
-        else:
-            return acc
+        return KernelDensityEstimator.get_spacing(
+            a=a,
+            scale=scale,
+            method=method,
+            method_kw=method_kw,
+            feat=feat,
+            ret_scaled=ret_scaled,
+        )
     @property
     def _feature_candidates(self):
@@ -625,8 +586,8 @@ class RTDCBase(abc.ABC):
         y = self[yax][self.filter.all]
         # Apply scale (no change for linear scale)
-        xs = RTDCBase._apply_scale(x, xscale, xax)
-        ys = RTDCBase._apply_scale(y, yscale, yax)
+        xs = KernelDensityEstimator.apply_scale(x, xscale, xax)
+        ys = KernelDensityEstimator.apply_scale(y, yscale, yax)
         _, _, idx = downsampling.downsample_grid(xs, ys,
                                                  samples=downsample,
@@ -673,64 +634,11 @@ class RTDCBase(abc.ABC):
         X, Y, Z : coordinates
             The kernel density Z evaluated on a rectangular grid (X,Y).
         """
-        if kde_kwargs is None:
-            kde_kwargs = {}
-        xax = xax.lower()
-        yax = yax.lower()
-        kde_type = kde_type.lower()
-        if kde_type not in kde_methods.methods:
-            raise ValueError("Not a valid kde type: {}!".format(kde_type))
-        # Get data
-        x = self[xax][self.filter.all]
-        y = self[yax][self.filter.all]
-        xacc_sc, xs = RTDCBase.get_kde_spacing(
-            a=x,
-            feat=xax,
-            scale=xscale,
-            method=kde_methods.bin_width_doane,
-            ret_scaled=True)
-        yacc_sc, ys = RTDCBase.get_kde_spacing(
-            a=y,
-            feat=yax,
-            scale=yscale,
-            method=kde_methods.bin_width_doane,
-            ret_scaled=True)
-        if xacc is None or xacc == 0:
-            xacc = xacc_sc / 5
-        if yacc is None or yacc == 0:
-            yacc = yacc_sc / 5
-        # Ignore infs and nans
-        bad = kde_methods.get_bad_vals(xs, ys)
-        xc = xs[~bad]
-        yc = ys[~bad]
-        xnum = int(np.ceil((xc.max() - xc.min()) / xacc))
-        ynum = int(np.ceil((yc.max() - yc.min()) / yacc))
-        xlin = np.linspace(xc.min(), xc.max(), xnum, endpoint=True)
-        ylin = np.linspace(yc.min(), yc.max(), ynum, endpoint=True)
-        xmesh, ymesh = np.meshgrid(xlin, ylin, indexing="ij")
-        kde_fct = kde_methods.methods[kde_type]
-        if len(x):
-            density = kde_fct(events_x=xs, events_y=ys,
-                              xout=xmesh, yout=ymesh,
-                              **kde_kwargs)
-        else:
-            density = np.array([])
-        # Convert mesh back to linear scale if applicable
-        if xscale == "log":
-            xmesh = np.exp(xmesh)
-        if yscale == "log":
-            ymesh = np.exp(ymesh)
+        kde_instance = KernelDensityEstimator(rtdc_ds=self)
+        xmesh, ymesh, density = kde_instance.get_contour(
+            xax=xax, yax=yax, xacc=xacc, yacc=yacc, kde_type=kde_type,
+            kde_kwargs=kde_kwargs, xscale=xscale, yscale=yscale
+        )
         return xmesh, ymesh, density
@@ -765,36 +673,11 @@ class RTDCBase(abc.ABC):
         density : 1d ndarray
             The kernel density evaluated for the filtered data points.
         """
-        if kde_kwargs is None:
-            kde_kwargs = {}
-        xax = xax.lower()
-        yax = yax.lower()
-        kde_type = kde_type.lower()
-        if kde_type not in kde_methods.methods:
-            raise ValueError("Not a valid kde type: {}!".format(kde_type))
-        # Get data
-        x = self[xax][self.filter.all]
-        y = self[yax][self.filter.all]
-        # Apply scale (no change for linear scale)
-        xs = RTDCBase._apply_scale(x, xscale, xax)
-        ys = RTDCBase._apply_scale(y, yscale, yax)
-        if positions is None:
-            posx = None
-            posy = None
-        else:
-            posx = RTDCBase._apply_scale(positions[0], xscale, xax)
-            posy = RTDCBase._apply_scale(positions[1], yscale, yax)
-        kde_fct = kde_methods.methods[kde_type]
-        if len(x):
-            density = kde_fct(events_x=xs, events_y=ys,
-                              xout=posx, yout=posy,
-                              **kde_kwargs)
-        else:
-            density = np.array([])
+        kde_instance = KernelDensityEstimator(rtdc_ds=self)
+        density = kde_instance.get_scatter(
+            xax=xax, yax=yax, positions=positions, kde_type=kde_type,
+            kde_kwargs=kde_kwargs, xscale=xscale, yscale=yscale
+        )
         return density
@@ -879,7 +762,8 @@ class RTDCBase(abc.ABC):
             elif bdict["type"] == "file":
                 if not self._local_basins_allowed:
                     warnings.warn(f"Basin type 'file' not allowed for format "
-                                  f"'{self.format}'")
+                                  f"'{self.format}'",
+                                  LocalBasinForbiddenWarning)
                     # stop processing this basin
                     continue
                 p_paths = list(bdict["paths"])