dclab 0.62.16__cp312-cp312-win_amd64.whl → 0.63.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

dclab/kde/methods.py ADDED
@@ -0,0 +1,303 @@
1
+ """Kernel Density Estimation methods"""
2
+
3
+ import numpy as np
4
+ from scipy.interpolate import RectBivariateSpline
5
+ from scipy.stats import gaussian_kde, skew
6
+
7
+ from ..cached import Cache
8
+ from ..external.statsmodels.nonparametric.kernel_density import KDEMultivariate
9
+
10
+
11
+ def bin_num_doane(a):
12
+ """Compute number of bins based on Doane's formula
13
+
14
+ Notes
15
+ -----
16
+ If the bin width cannot be determined, then a bin
17
+ number of 5 is returned.
18
+
19
+ See Also
20
+ --------
21
+ bin_width_doane: method used to compute the bin width
22
+ """
23
+ bad = np.isnan(a) | np.isinf(a)
24
+ data = a[~bad]
25
+ acc = bin_width_doane(a)
26
+ if acc == 0 or np.isnan(acc):
27
+ num = 5
28
+ else:
29
+ num = int(np.round((data.max() - data.min()) / acc))
30
+ return num
31
+
32
+
33
+ def bin_width_doane(a):
34
+ """Compute contour spacing based on Doane's formula
35
+
36
+ References
37
+ ----------
38
+ - `<https://en.wikipedia.org/wiki/Histogram#Number_of_bins_and_width>`_
39
+ - `<https://stats.stackexchange.com/questions/55134/
40
+ doanes-formula-for-histogram-binning>`_
41
+
42
+ Notes
43
+ -----
44
+ Doane's formula is actually designed for histograms. This
45
+ function is kept here for backwards-compatibility reasons.
46
+ It is highly recommended to use :func:`bin_width_percentile`
47
+ instead.
48
+ """
49
+ bad = np.isnan(a) | np.isinf(a)
50
+ data = a[~bad]
51
+ n = data.size
52
+ g1 = skew(data)
53
+ sigma_g1 = np.sqrt(6 * (n - 2) / ((n + 1) * (n + 3)))
54
+ k = 1 + np.log2(n) + np.log2(1 + np.abs(g1) / sigma_g1)
55
+ acc = (data.max() - data.min()) / k
56
+ return acc
57
+
58
+
59
+ def bin_width_percentile(a):
60
+ """Compute contour spacing based on data percentiles
61
+
62
+ The 10th and the 90th percentile of the input data are taken.
63
+ The spacing then computes to the difference between those
64
+ two percentiles divided by 23.
65
+
66
+ Notes
67
+ -----
68
+ The Freedman–Diaconis rule uses the interquartile range and
69
+ normalizes to the third root of len(a). Such things do not
70
+ work very well for RT-DC data, because len(a) is huge. Here
71
+ we use just the top and bottom 10th percentiles with a fixed
72
+ normalization.
73
+ """
74
+ bad = np.isnan(a) | np.isinf(a)
75
+ data = a[~bad]
76
+ start = np.percentile(data, 10)
77
+ end = np.percentile(data, 90)
78
+ acc = (end - start) / 23
79
+ return acc
80
+
81
+
82
+ def get_bad_vals(x, y):
83
+ return np.isnan(x) | np.isinf(x) | np.isnan(y) | np.isinf(y)
84
+
85
+
86
+ def ignore_nan_inf(kde_method):
87
+ """Ignores nans and infs from the input data
88
+
89
+ Invalid positions in the resulting density are set to nan.
90
+ """
91
+ def new_kde_method(events_x, events_y, xout=None, yout=None,
92
+ *args, **kwargs):
93
+ bad_in = get_bad_vals(events_x, events_y)
94
+ if xout is None:
95
+ density = np.zeros_like(events_x, dtype=np.float64)
96
+ bad_out = bad_in
97
+ xo = yo = None
98
+ else:
99
+ density = np.zeros_like(xout, dtype=np.float64)
100
+ bad_out = get_bad_vals(xout, yout)
101
+ xo = xout[~bad_out]
102
+ yo = yout[~bad_out]
103
+ # Filter events
104
+ ev_x = events_x[~bad_in]
105
+ ev_y = events_y[~bad_in]
106
+ density[~bad_out] = kde_method(ev_x, ev_y,
107
+ xo, yo,
108
+ *args, **kwargs)
109
+ density[bad_out] = np.nan
110
+ return density
111
+
112
+ doc_add = "\n Notes\n" +\
113
+ " -----\n" +\
114
+ " This is a wrapped version that ignores nan and inf values."
115
+ new_kde_method.__doc__ = kde_method.__doc__ + doc_add
116
+
117
+ return new_kde_method
118
+
119
+
120
+ @ignore_nan_inf
121
+ @Cache
122
+ def kde_gauss(events_x, events_y, xout=None, yout=None):
123
+ """ Gaussian Kernel Density Estimation
124
+
125
+ Parameters
126
+ ----------
127
+ events_x, events_y: 1D ndarray
128
+ The input points for kernel density estimation. Input
129
+ is flattened automatically.
130
+ xout, yout: ndarray
131
+ The coordinates at which the KDE should be computed.
132
+ If set to none, input coordinates are used.
133
+
134
+ Returns
135
+ -------
136
+ density: ndarray, same shape as `xout`
137
+ The KDE for the points in (xout, yout)
138
+
139
+ See Also
140
+ --------
141
+ `scipy.stats.gaussian_kde`
142
+ """
143
+ valid_combi = ((xout is None and yout is None) or
144
+ (xout is not None and yout is not None)
145
+ )
146
+ if not valid_combi:
147
+ raise ValueError("Both `xout` and `yout` must be (un)set.")
148
+
149
+ if xout is None and yout is None:
150
+ xout = events_x
151
+ yout = events_y
152
+
153
+ try:
154
+ estimator = gaussian_kde([events_x.flatten(), events_y.flatten()])
155
+ density = estimator.evaluate([xout.flatten(), yout.flatten()])
156
+ except np.linalg.LinAlgError:
157
+ # LinAlgError occurs when matrix to solve is singular (issue #117)
158
+ density = np.zeros(xout.shape)*np.nan
159
+ return density.reshape(xout.shape)
160
+
161
+
162
+ @ignore_nan_inf
163
+ @Cache
164
+ def kde_histogram(events_x, events_y, xout=None, yout=None, bins=None):
165
+ """ Histogram-based Kernel Density Estimation
166
+
167
+ Parameters
168
+ ----------
169
+ events_x, events_y: 1D ndarray
170
+ The input points for kernel density estimation. Input
171
+ is flattened automatically.
172
+ xout, yout: ndarray
173
+ The coordinates at which the KDE should be computed.
174
+ If set to none, input coordinates are used.
175
+ bins: tuple (binsx, binsy)
176
+ The number of bins to use for the histogram.
177
+
178
+ Returns
179
+ -------
180
+ density: ndarray, same shape as `xout`
181
+ The KDE for the points in (xout, yout)
182
+
183
+ See Also
184
+ --------
185
+ `numpy.histogram2d`
186
+ `scipy.interpolate.RectBivariateSpline`
187
+ """
188
+ valid_combi = ((xout is None and yout is None) or
189
+ (xout is not None and yout is not None)
190
+ )
191
+ if not valid_combi:
192
+ raise ValueError("Both `xout` and `yout` must be (un)set.")
193
+
194
+ if xout is None and yout is None:
195
+ xout = events_x
196
+ yout = events_y
197
+
198
+ if bins is None:
199
+ bins = (max(5, bin_num_doane(events_x)),
200
+ max(5, bin_num_doane(events_y)))
201
+
202
+ # Compute the histogram
203
+ hist2d, xedges, yedges = np.histogram2d(x=events_x,
204
+ y=events_y,
205
+ bins=bins,
206
+ density=True)
207
+ xip = xedges[1:]-(xedges[1]-xedges[0])/2
208
+ yip = yedges[1:]-(yedges[1]-yedges[0])/2
209
+
210
+ estimator = RectBivariateSpline(x=xip, y=yip, z=hist2d)
211
+ density = estimator.ev(xout, yout)
212
+ density[density < 0] = 0
213
+
214
+ return density.reshape(xout.shape)
215
+
216
+
217
+ def kde_none(events_x, events_y, xout=None, yout=None):
218
+ """No Kernel Density Estimation
219
+
220
+ Parameters
221
+ ----------
222
+ events_x, events_y: 1D ndarray
223
+ The input points for kernel density estimation. Input
224
+ is flattened automatically.
225
+ xout, yout: ndarray
226
+ The coordinates at which the KDE should be computed.
227
+ If set to none, input coordinates are used.
228
+
229
+ Returns
230
+ -------
231
+ density: ndarray, same shape as `xout`
232
+ The KDE for the points in (xout, yout)
233
+
234
+ Notes
235
+ -----
236
+ This method is a convenience method that always returns ones in the shape
237
+ that the other methods in this module produce.
238
+ """
239
+ valid_combi = ((xout is None and yout is None) or
240
+ (xout is not None and yout is not None)
241
+ )
242
+ if not valid_combi:
243
+ raise ValueError("Both `xout` and `yout` must be (un)set.")
244
+
245
+ if xout is None and yout is None:
246
+ xout = events_x
247
+ _ = events_y
248
+
249
+ return np.ones(xout.shape)
250
+
251
+
252
+ @ignore_nan_inf
253
+ @Cache
254
+ def kde_multivariate(events_x, events_y, xout=None, yout=None, bw=None):
255
+ """ Multivariate Kernel Density Estimation
256
+
257
+ Parameters
258
+ ----------
259
+ events_x, events_y: 1D ndarray
260
+ The input points for kernel density estimation. Input
261
+ is flattened automatically.
262
+ bw: tuple (bwx, bwy) or None
263
+ The bandwith for kernel density estimation.
264
+ xout, yout: ndarray
265
+ The coordinates at which the KDE should be computed.
266
+ If set to none, input coordinates are used.
267
+
268
+ Returns
269
+ -------
270
+ density: ndarray, same shape as `xout`
271
+ The KDE for the points in (xout, yout)
272
+
273
+ See Also
274
+ --------
275
+ `statsmodels.nonparametric.kernel_density.KDEMultivariate`
276
+ """
277
+ valid_combi = ((xout is None and yout is None) or
278
+ (xout is not None and yout is not None)
279
+ )
280
+ if not valid_combi:
281
+ raise ValueError("Both `xout` and `yout` must be (un)set.")
282
+
283
+ if xout is None and yout is None:
284
+ xout = events_x
285
+ yout = events_y
286
+ if bw is None:
287
+ # divide by 2 to make it comparable to histogram KDE
288
+ bw = (bin_width_doane(events_x) / 2,
289
+ bin_width_doane(events_y) / 2)
290
+
291
+ positions = np.vstack([xout.flatten(), yout.flatten()])
292
+ estimator_ly = KDEMultivariate(data=[events_x.flatten(),
293
+ events_y.flatten()],
294
+ var_type='cc', bw=bw)
295
+
296
+ density = estimator_ly.pdf(positions)
297
+ return density.reshape(xout.shape)
298
+
299
+
300
+ methods = {"gauss": kde_gauss,
301
+ "histogram": kde_histogram,
302
+ "none": kde_none,
303
+ "multivariate": kde_multivariate}
dclab/kde_contours.py CHANGED
@@ -1,222 +1,10 @@
1
+ import warnings
1
2
 
2
- import numpy as np
3
+ from .kde.contours import ( # noqa: F401
4
+ find_contours_level, _find_quantile_level, get_quantile_levels
5
+ )
3
6
 
4
- from .external.skimage.measure import find_contours, points_in_poly
5
- import scipy.interpolate as spint
6
7
 
7
- from .kde_methods import get_bad_vals
8
-
9
-
10
- def find_contours_level(density, x, y, level, closed=False):
11
- """Find iso-valued density contours for a given level value
12
-
13
- Parameters
14
- ----------
15
- density: 2d ndarray of shape (M, N)
16
- Kernel density estimate (KDE) for which to compute the contours
17
- x: 2d ndarray of shape (M, N) or 1d ndarray of size M
18
- X-values corresponding to `density`
19
- y: 2d ndarray of shape (M, N) or 1d ndarray of size M
20
- Y-values corresponding to `density`
21
- level: float between 0 and 1
22
- Value along which to find contours in `density` relative
23
- to its maximum
24
- closed: bool
25
- Whether to close contours at the KDE support boundaries
26
-
27
- Returns
28
- -------
29
- contours: list of ndarrays of shape (P, 2)
30
- Contours found for the given level value
31
-
32
- See Also
33
- --------
34
- skimage.measure.find_contours: Contour finding algorithm used
35
- """
36
- if level >= 1 or level <= 0:
37
- raise ValueError("`level` must be in (0,1), got '{}'!".format(level))
38
- # level relative to maximum
39
- level = level * density.max()
40
- # xy coordinates
41
- if len(x.shape) == 2:
42
- assert np.all(x[:, 0] == x[:, 1])
43
- x = x[:, 0]
44
- if len(y.shape) == 2:
45
- assert np.all(y[0, :] == y[1, :])
46
- y = y[0, :]
47
- if closed:
48
- # find closed contours
49
- density = np.pad(density, ((1, 1), (1, 1)), mode="constant")
50
- offset = 1
51
- else:
52
- # leave contours open at kde boundary
53
- offset = 0
54
-
55
- conts_idx = find_contours(density, level)
56
- conts_xy = []
57
-
58
- for cc in conts_idx:
59
- cx = np.interp(x=cc[:, 0]-offset,
60
- xp=range(x.size),
61
- fp=x)
62
- cy = np.interp(x=cc[:, 1]-offset,
63
- xp=range(y.size),
64
- fp=y)
65
- conts_xy.append(np.stack((cx, cy), axis=1))
66
-
67
- return conts_xy
68
-
69
-
70
- def get_quantile_levels(density, x, y, xp, yp, q, normalize=True):
71
- """Compute density levels for given quantiles by interpolation
72
-
73
- For a given 2D density, compute the density levels at which
74
- the resulting contours contain the fraction `1-q` of all
75
- data points. E.g. for a measurement of 1000 events, all
76
- contours at the level corresponding to a quantile of
77
- `q=0.95` (95th percentile) contain 50 events (5%).
78
-
79
- Parameters
80
- ----------
81
- density: 2d ndarray of shape (M, N)
82
- Kernel density estimate for which to compute the contours
83
- x: 2d ndarray of shape (M, N) or 1d ndarray of size M
84
- X-values corresponding to `density`
85
- y: 2d ndarray of shape (M, N) or 1d ndarray of size M
86
- Y-values corresponding to `density`
87
- xp: 1d ndarray of size D
88
- Event x-data from which to compute the quantile
89
- yp: 1d ndarray of size D
90
- Event y-data from which to compute the quantile
91
- q: array_like or float between 0 and 1
92
- Quantile along which to find contours in `density` relative
93
- to its maximum
94
- normalize: bool
95
- Whether output levels should be normalized to the maximum
96
- of `density`
97
-
98
- Returns
99
- -------
100
- level: np.ndarray or float
101
- Contours level(s) corresponding to the given quantile
102
-
103
- Notes
104
- -----
105
- NaN-values events in `xp` and `yp` are ignored.
106
- """
107
- # xy coordinates
108
- if len(x.shape) == 2:
109
- assert np.all(x[:, 0] == x[:, 1])
110
- x = x[:, 0]
111
- if len(y.shape) == 2:
112
- assert np.all(y[0, :] == y[1, :])
113
- y = y[0, :]
114
-
115
- # remove bad events
116
- bad = get_bad_vals(xp, yp)
117
- xp = xp[~bad]
118
- yp = yp[~bad]
119
-
120
- # Normalize interpolation data such that the spacing for
121
- # x and y is about the same during interpolation.
122
- x_norm = x.max()
123
- x = x / x_norm
124
- xp = xp / x_norm
125
-
126
- y_norm = y.max()
127
- y = y / y_norm
128
- yp = yp / y_norm
129
-
130
- # Perform interpolation
131
- dp = spint.interpn((x, y), density,
132
- (xp, yp),
133
- method='linear',
134
- bounds_error=False,
135
- fill_value=0)
136
-
137
- if normalize:
138
- dp /= density.max()
139
-
140
- if not np.isscalar(q):
141
- q = np.array(q)
142
- plev = np.nanpercentile(dp, q=q*100)
143
- return plev
144
-
145
-
146
- def _find_quantile_level(density, x, y, xp, yp, quantile, acc=.01,
147
- ret_err=False):
148
- """Find density level for a given data quantile by iteration
149
-
150
- Parameters
151
- ----------
152
- density: 2d ndarray of shape (M, N)
153
- Kernel density estimate for which to compute the contours
154
- x: 2d ndarray of shape (M, N) or 1d ndarray of size M
155
- X-values corresponding to `density`
156
- y: 2d ndarray of shape (M, N) or 1d ndarray of size M
157
- Y-values corresponding to `density`
158
- xp: 1d ndarray of size D
159
- Event x-data from which to compute the quantile
160
- yp: 1d ndarray of size D
161
- Event y-data from which to compute the quantile
162
- quantile: float between 0 and 1
163
- Quantile along which to find contours in `density` relative
164
- to its maximum
165
- acc: float
166
- Desired absolute accuracy (stopping criterion) of the
167
- contours
168
- ret_err: bool
169
- If True, also return the absolute error
170
-
171
- Returns
172
- -------
173
- level: float
174
- Contours level corresponding to the given quantile
175
-
176
- Notes
177
- -----
178
- A much more faster method (using interpolation) is implemented in
179
- :func:`get_quantile_levels`.
180
- NaN-values events in `xp` and `yp` are ignored.
181
-
182
- See Also
183
- --------
184
- skimage.measure.find_contours: Contour finding algorithm
185
- """
186
- if quantile >= 1 or quantile <= 0:
187
- raise ValueError("Invalid value for `quantile`: {}".format(quantile))
188
-
189
- # remove bad events
190
- bad = get_bad_vals(xp, yp)
191
- xp = xp[~bad]
192
- yp = yp[~bad]
193
- points = np.concatenate((xp.reshape(-1, 1), yp.reshape(-1, 1)), axis=1)
194
-
195
- # initial guess
196
- level = quantile
197
- # error of current iteration
198
- err = 1
199
- # iteration factor (guarantees convergence)
200
- itfac = 1
201
- # total number of events
202
- nev = xp.size
203
-
204
- while np.abs(err) > acc:
205
- # compute contours
206
- conts = find_contours_level(density, x, y, level, closed=True)
207
- # compute number of points in contour
208
- isin = 0
209
- pi = np.array(points, copy=True)
210
- for cc in conts:
211
- pinc = points_in_poly(points=pi, verts=cc)
212
- isin += np.sum(pinc)
213
- # ignore these points for the other contours
214
- pi = pi[~pinc]
215
- err = quantile - (nev - isin) / nev
216
- level += err * itfac
217
- itfac *= .9
218
-
219
- if ret_err:
220
- return level, err
221
- else:
222
- return level
8
+ warnings.warn("`dclab.kde_contours` is deprecated; please use "
9
+ "the `dclab.kde.contours` instead",
10
+ DeprecationWarning)