dclab 0.67.0__cp314-cp314-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dclab might be problematic. Click here for more details.
- dclab/__init__.py +41 -0
- dclab/_version.py +34 -0
- dclab/cached.py +97 -0
- dclab/cli/__init__.py +10 -0
- dclab/cli/common.py +237 -0
- dclab/cli/task_compress.py +126 -0
- dclab/cli/task_condense.py +223 -0
- dclab/cli/task_join.py +229 -0
- dclab/cli/task_repack.py +98 -0
- dclab/cli/task_split.py +154 -0
- dclab/cli/task_tdms2rtdc.py +186 -0
- dclab/cli/task_verify_dataset.py +75 -0
- dclab/definitions/__init__.py +79 -0
- dclab/definitions/feat_const.py +202 -0
- dclab/definitions/feat_logic.py +182 -0
- dclab/definitions/meta_const.py +252 -0
- dclab/definitions/meta_logic.py +111 -0
- dclab/definitions/meta_parse.py +94 -0
- dclab/downsampling.cpython-314-darwin.so +0 -0
- dclab/downsampling.pyx +230 -0
- dclab/external/__init__.py +4 -0
- dclab/external/packaging/LICENSE +3 -0
- dclab/external/packaging/LICENSE.APACHE +177 -0
- dclab/external/packaging/LICENSE.BSD +23 -0
- dclab/external/packaging/__init__.py +6 -0
- dclab/external/packaging/_structures.py +61 -0
- dclab/external/packaging/version.py +505 -0
- dclab/external/skimage/LICENSE +28 -0
- dclab/external/skimage/__init__.py +2 -0
- dclab/external/skimage/_find_contours.py +216 -0
- dclab/external/skimage/_find_contours_cy.cpython-314-darwin.so +0 -0
- dclab/external/skimage/_find_contours_cy.pyx +188 -0
- dclab/external/skimage/_pnpoly.cpython-314-darwin.so +0 -0
- dclab/external/skimage/_pnpoly.pyx +99 -0
- dclab/external/skimage/_shared/__init__.py +1 -0
- dclab/external/skimage/_shared/geometry.cpython-314-darwin.so +0 -0
- dclab/external/skimage/_shared/geometry.pxd +6 -0
- dclab/external/skimage/_shared/geometry.pyx +55 -0
- dclab/external/skimage/measure.py +7 -0
- dclab/external/skimage/pnpoly.py +53 -0
- dclab/external/statsmodels/LICENSE +35 -0
- dclab/external/statsmodels/__init__.py +6 -0
- dclab/external/statsmodels/nonparametric/__init__.py +1 -0
- dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
- dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
- dclab/external/statsmodels/nonparametric/kernels.py +36 -0
- dclab/features/__init__.py +9 -0
- dclab/features/bright.py +81 -0
- dclab/features/bright_bc.py +93 -0
- dclab/features/bright_perc.py +63 -0
- dclab/features/contour.py +161 -0
- dclab/features/emodulus/__init__.py +339 -0
- dclab/features/emodulus/load.py +252 -0
- dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
- dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
- dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
- dclab/features/emodulus/pxcorr.py +135 -0
- dclab/features/emodulus/scale_linear.py +247 -0
- dclab/features/emodulus/viscosity.py +260 -0
- dclab/features/fl_crosstalk.py +95 -0
- dclab/features/inert_ratio.py +377 -0
- dclab/features/volume.py +242 -0
- dclab/http_utils.py +322 -0
- dclab/isoelastics/__init__.py +468 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
- dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
- dclab/kde/__init__.py +1 -0
- dclab/kde/base.py +459 -0
- dclab/kde/contours.py +222 -0
- dclab/kde/methods.py +313 -0
- dclab/kde_contours.py +10 -0
- dclab/kde_methods.py +11 -0
- dclab/lme4/__init__.py +5 -0
- dclab/lme4/lme4_template.R +94 -0
- dclab/lme4/rsetup.py +204 -0
- dclab/lme4/wrapr.py +386 -0
- dclab/polygon_filter.py +398 -0
- dclab/rtdc_dataset/__init__.py +15 -0
- dclab/rtdc_dataset/check.py +902 -0
- dclab/rtdc_dataset/config.py +533 -0
- dclab/rtdc_dataset/copier.py +353 -0
- dclab/rtdc_dataset/core.py +896 -0
- dclab/rtdc_dataset/export.py +867 -0
- dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
- dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
- dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
- dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
- dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
- dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
- dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
- dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
- dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
- dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
- dclab/rtdc_dataset/feat_basin.py +762 -0
- dclab/rtdc_dataset/feat_temp.py +102 -0
- dclab/rtdc_dataset/filter.py +263 -0
- dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
- dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
- dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
- dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
- dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
- dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
- dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
- dclab/rtdc_dataset/fmt_dict.py +103 -0
- dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
- dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
- dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
- dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
- dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
- dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
- dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
- dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
- dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
- dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
- dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
- dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
- dclab/rtdc_dataset/fmt_http.py +102 -0
- dclab/rtdc_dataset/fmt_s3.py +354 -0
- dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
- dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
- dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
- dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
- dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
- dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
- dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
- dclab/rtdc_dataset/load.py +77 -0
- dclab/rtdc_dataset/meta_table.py +25 -0
- dclab/rtdc_dataset/writer.py +1019 -0
- dclab/statistics.py +226 -0
- dclab/util.py +176 -0
- dclab/warn.py +15 -0
- dclab-0.67.0.dist-info/METADATA +153 -0
- dclab-0.67.0.dist-info/RECORD +142 -0
- dclab-0.67.0.dist-info/WHEEL +6 -0
- dclab-0.67.0.dist-info/entry_points.txt +8 -0
- dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
- dclab-0.67.0.dist-info/top_level.txt +1 -0
dclab/kde/methods.py
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
"""Kernel Density Estimation methods"""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from scipy.interpolate import RectBivariateSpline
|
|
5
|
+
from scipy.stats import gaussian_kde, skew
|
|
6
|
+
|
|
7
|
+
from ..cached import Cache
|
|
8
|
+
from ..external.statsmodels.nonparametric.kernel_density import KDEMultivariate
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def bin_num_doane(a):
|
|
12
|
+
"""Compute number of bins based on Doane's formula
|
|
13
|
+
|
|
14
|
+
Notes
|
|
15
|
+
-----
|
|
16
|
+
If the bin width cannot be determined, then a bin
|
|
17
|
+
number of 5 is returned.
|
|
18
|
+
|
|
19
|
+
See Also
|
|
20
|
+
--------
|
|
21
|
+
bin_width_doane: method used to compute the bin width
|
|
22
|
+
"""
|
|
23
|
+
bad = np.isnan(a) | np.isinf(a)
|
|
24
|
+
data = a[~bad]
|
|
25
|
+
acc = bin_width_doane(a)
|
|
26
|
+
if acc == 0 or np.isnan(acc):
|
|
27
|
+
num = 5
|
|
28
|
+
else:
|
|
29
|
+
num = int(np.round((data.max() - data.min()) / acc))
|
|
30
|
+
return num
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def bin_width_doane(a):
|
|
34
|
+
"""Compute contour spacing based on Doane's formula
|
|
35
|
+
|
|
36
|
+
References
|
|
37
|
+
----------
|
|
38
|
+
- `<https://en.wikipedia.org/wiki/Histogram#Number_of_bins_and_width>`_
|
|
39
|
+
- `<https://stats.stackexchange.com/questions/55134/
|
|
40
|
+
doanes-formula-for-histogram-binning>`_
|
|
41
|
+
|
|
42
|
+
Notes
|
|
43
|
+
-----
|
|
44
|
+
Doane's formula is actually designed for histograms. This
|
|
45
|
+
function is kept here for backwards-compatibility reasons.
|
|
46
|
+
It is highly recommended to use :func:`bin_width_percentile`
|
|
47
|
+
instead.
|
|
48
|
+
"""
|
|
49
|
+
bad = np.isnan(a) | np.isinf(a)
|
|
50
|
+
data = a[~bad]
|
|
51
|
+
n = data.size
|
|
52
|
+
g1 = skew(data)
|
|
53
|
+
sigma_g1 = np.sqrt(6 * (n - 2) / ((n + 1) * (n + 3)))
|
|
54
|
+
k = 1 + np.log2(n) + np.log2(1 + np.abs(g1) / sigma_g1)
|
|
55
|
+
acc = (data.max() - data.min()) / k
|
|
56
|
+
return acc
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def bin_width_doane_div5(a):
|
|
60
|
+
"""Compute contour spacing based on Doane's formula divided by five
|
|
61
|
+
|
|
62
|
+
See Also
|
|
63
|
+
--------
|
|
64
|
+
bin_width_doane: method used to compute the bin width
|
|
65
|
+
"""
|
|
66
|
+
return bin_width_doane(a) / 5
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def bin_width_percentile(a):
|
|
70
|
+
"""Compute contour spacing based on data percentiles
|
|
71
|
+
|
|
72
|
+
The 10th and the 90th percentile of the input data are taken.
|
|
73
|
+
The spacing then computes to the difference between those
|
|
74
|
+
two percentiles divided by 23.
|
|
75
|
+
|
|
76
|
+
Notes
|
|
77
|
+
-----
|
|
78
|
+
The Freedman–Diaconis rule uses the interquartile range and
|
|
79
|
+
normalizes to the third root of len(a). Such things do not
|
|
80
|
+
work very well for RT-DC data, because len(a) is huge. Here
|
|
81
|
+
we use just the top and bottom 10th percentiles with a fixed
|
|
82
|
+
normalization.
|
|
83
|
+
"""
|
|
84
|
+
bad = np.isnan(a) | np.isinf(a)
|
|
85
|
+
data = a[~bad]
|
|
86
|
+
start = np.percentile(data, 10)
|
|
87
|
+
end = np.percentile(data, 90)
|
|
88
|
+
acc = (end - start) / 23
|
|
89
|
+
return acc
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def get_bad_vals(x, y):
|
|
93
|
+
return np.isnan(x) | np.isinf(x) | np.isnan(y) | np.isinf(y)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def ignore_nan_inf(kde_method):
|
|
97
|
+
"""Ignores nans and infs from the input data
|
|
98
|
+
|
|
99
|
+
Invalid positions in the resulting density are set to nan.
|
|
100
|
+
"""
|
|
101
|
+
def new_kde_method(events_x, events_y, xout=None, yout=None,
|
|
102
|
+
*args, **kwargs):
|
|
103
|
+
bad_in = get_bad_vals(events_x, events_y)
|
|
104
|
+
if xout is None:
|
|
105
|
+
density = np.zeros_like(events_x, dtype=np.float64)
|
|
106
|
+
bad_out = bad_in
|
|
107
|
+
xo = yo = None
|
|
108
|
+
else:
|
|
109
|
+
density = np.zeros_like(xout, dtype=np.float64)
|
|
110
|
+
bad_out = get_bad_vals(xout, yout)
|
|
111
|
+
xo = xout[~bad_out]
|
|
112
|
+
yo = yout[~bad_out]
|
|
113
|
+
# Filter events
|
|
114
|
+
ev_x = events_x[~bad_in]
|
|
115
|
+
ev_y = events_y[~bad_in]
|
|
116
|
+
density[~bad_out] = kde_method(ev_x, ev_y,
|
|
117
|
+
xo, yo,
|
|
118
|
+
*args, **kwargs)
|
|
119
|
+
density[bad_out] = np.nan
|
|
120
|
+
return density
|
|
121
|
+
|
|
122
|
+
doc_add = "\n Notes\n" +\
|
|
123
|
+
" -----\n" +\
|
|
124
|
+
" This is a wrapped version that ignores nan and inf values."
|
|
125
|
+
new_kde_method.__doc__ = kde_method.__doc__ + doc_add
|
|
126
|
+
|
|
127
|
+
return new_kde_method
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@ignore_nan_inf
|
|
131
|
+
@Cache
|
|
132
|
+
def kde_gauss(events_x, events_y, xout=None, yout=None):
|
|
133
|
+
""" Gaussian Kernel Density Estimation
|
|
134
|
+
|
|
135
|
+
Parameters
|
|
136
|
+
----------
|
|
137
|
+
events_x, events_y: 1D ndarray
|
|
138
|
+
The input points for kernel density estimation. Input
|
|
139
|
+
is flattened automatically.
|
|
140
|
+
xout, yout: ndarray
|
|
141
|
+
The coordinates at which the KDE should be computed.
|
|
142
|
+
If set to none, input coordinates are used.
|
|
143
|
+
|
|
144
|
+
Returns
|
|
145
|
+
-------
|
|
146
|
+
density: ndarray, same shape as `xout`
|
|
147
|
+
The KDE for the points in (xout, yout)
|
|
148
|
+
|
|
149
|
+
See Also
|
|
150
|
+
--------
|
|
151
|
+
`scipy.stats.gaussian_kde`
|
|
152
|
+
"""
|
|
153
|
+
valid_combi = ((xout is None and yout is None) or
|
|
154
|
+
(xout is not None and yout is not None)
|
|
155
|
+
)
|
|
156
|
+
if not valid_combi:
|
|
157
|
+
raise ValueError("Both `xout` and `yout` must be (un)set.")
|
|
158
|
+
|
|
159
|
+
if xout is None and yout is None:
|
|
160
|
+
xout = events_x
|
|
161
|
+
yout = events_y
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
estimator = gaussian_kde([events_x.flatten(), events_y.flatten()])
|
|
165
|
+
density = estimator.evaluate([xout.flatten(), yout.flatten()])
|
|
166
|
+
except np.linalg.LinAlgError:
|
|
167
|
+
# LinAlgError occurs when matrix to solve is singular (issue #117)
|
|
168
|
+
density = np.zeros(xout.shape)*np.nan
|
|
169
|
+
return density.reshape(xout.shape)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@ignore_nan_inf
|
|
173
|
+
@Cache
|
|
174
|
+
def kde_histogram(events_x, events_y, xout=None, yout=None, bins=None):
|
|
175
|
+
""" Histogram-based Kernel Density Estimation
|
|
176
|
+
|
|
177
|
+
Parameters
|
|
178
|
+
----------
|
|
179
|
+
events_x, events_y: 1D ndarray
|
|
180
|
+
The input points for kernel density estimation. Input
|
|
181
|
+
is flattened automatically.
|
|
182
|
+
xout, yout: ndarray
|
|
183
|
+
The coordinates at which the KDE should be computed.
|
|
184
|
+
If set to none, input coordinates are used.
|
|
185
|
+
bins: tuple (binsx, binsy)
|
|
186
|
+
The number of bins to use for the histogram.
|
|
187
|
+
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
190
|
+
density: ndarray, same shape as `xout`
|
|
191
|
+
The KDE for the points in (xout, yout)
|
|
192
|
+
|
|
193
|
+
See Also
|
|
194
|
+
--------
|
|
195
|
+
`numpy.histogram2d`
|
|
196
|
+
`scipy.interpolate.RectBivariateSpline`
|
|
197
|
+
"""
|
|
198
|
+
valid_combi = ((xout is None and yout is None) or
|
|
199
|
+
(xout is not None and yout is not None)
|
|
200
|
+
)
|
|
201
|
+
if not valid_combi:
|
|
202
|
+
raise ValueError("Both `xout` and `yout` must be (un)set.")
|
|
203
|
+
|
|
204
|
+
if xout is None and yout is None:
|
|
205
|
+
xout = events_x
|
|
206
|
+
yout = events_y
|
|
207
|
+
|
|
208
|
+
if bins is None:
|
|
209
|
+
bins = (max(5, bin_num_doane(events_x)),
|
|
210
|
+
max(5, bin_num_doane(events_y)))
|
|
211
|
+
|
|
212
|
+
# Compute the histogram
|
|
213
|
+
hist2d, xedges, yedges = np.histogram2d(x=events_x,
|
|
214
|
+
y=events_y,
|
|
215
|
+
bins=bins,
|
|
216
|
+
density=True)
|
|
217
|
+
xip = xedges[1:]-(xedges[1]-xedges[0])/2
|
|
218
|
+
yip = yedges[1:]-(yedges[1]-yedges[0])/2
|
|
219
|
+
|
|
220
|
+
estimator = RectBivariateSpline(x=xip, y=yip, z=hist2d)
|
|
221
|
+
density = estimator.ev(xout, yout)
|
|
222
|
+
density[density < 0] = 0
|
|
223
|
+
|
|
224
|
+
return density.reshape(xout.shape)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def kde_none(events_x, events_y, xout=None, yout=None):
|
|
228
|
+
"""No Kernel Density Estimation
|
|
229
|
+
|
|
230
|
+
Parameters
|
|
231
|
+
----------
|
|
232
|
+
events_x, events_y: 1D ndarray
|
|
233
|
+
The input points for kernel density estimation. Input
|
|
234
|
+
is flattened automatically.
|
|
235
|
+
xout, yout: ndarray
|
|
236
|
+
The coordinates at which the KDE should be computed.
|
|
237
|
+
If set to none, input coordinates are used.
|
|
238
|
+
|
|
239
|
+
Returns
|
|
240
|
+
-------
|
|
241
|
+
density: ndarray, same shape as `xout`
|
|
242
|
+
The KDE for the points in (xout, yout)
|
|
243
|
+
|
|
244
|
+
Notes
|
|
245
|
+
-----
|
|
246
|
+
This method is a convenience method that always returns ones in the shape
|
|
247
|
+
that the other methods in this module produce.
|
|
248
|
+
"""
|
|
249
|
+
valid_combi = ((xout is None and yout is None) or
|
|
250
|
+
(xout is not None and yout is not None)
|
|
251
|
+
)
|
|
252
|
+
if not valid_combi:
|
|
253
|
+
raise ValueError("Both `xout` and `yout` must be (un)set.")
|
|
254
|
+
|
|
255
|
+
if xout is None and yout is None:
|
|
256
|
+
xout = events_x
|
|
257
|
+
_ = events_y
|
|
258
|
+
|
|
259
|
+
return np.ones(xout.shape)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
@ignore_nan_inf
|
|
263
|
+
@Cache
|
|
264
|
+
def kde_multivariate(events_x, events_y, xout=None, yout=None, bw=None):
|
|
265
|
+
""" Multivariate Kernel Density Estimation
|
|
266
|
+
|
|
267
|
+
Parameters
|
|
268
|
+
----------
|
|
269
|
+
events_x, events_y: 1D ndarray
|
|
270
|
+
The input points for kernel density estimation. Input
|
|
271
|
+
is flattened automatically.
|
|
272
|
+
bw: tuple (bwx, bwy) or None
|
|
273
|
+
The bandwith for kernel density estimation.
|
|
274
|
+
xout, yout: ndarray
|
|
275
|
+
The coordinates at which the KDE should be computed.
|
|
276
|
+
If set to none, input coordinates are used.
|
|
277
|
+
|
|
278
|
+
Returns
|
|
279
|
+
-------
|
|
280
|
+
density: ndarray, same shape as `xout`
|
|
281
|
+
The KDE for the points in (xout, yout)
|
|
282
|
+
|
|
283
|
+
See Also
|
|
284
|
+
--------
|
|
285
|
+
`statsmodels.nonparametric.kernel_density.KDEMultivariate`
|
|
286
|
+
"""
|
|
287
|
+
valid_combi = ((xout is None and yout is None) or
|
|
288
|
+
(xout is not None and yout is not None)
|
|
289
|
+
)
|
|
290
|
+
if not valid_combi:
|
|
291
|
+
raise ValueError("Both `xout` and `yout` must be (un)set.")
|
|
292
|
+
|
|
293
|
+
if xout is None and yout is None:
|
|
294
|
+
xout = events_x
|
|
295
|
+
yout = events_y
|
|
296
|
+
if bw is None:
|
|
297
|
+
# divide by 2 to make it comparable to histogram KDE
|
|
298
|
+
bw = (bin_width_doane(events_x) / 2,
|
|
299
|
+
bin_width_doane(events_y) / 2)
|
|
300
|
+
|
|
301
|
+
positions = np.vstack([xout.flatten(), yout.flatten()])
|
|
302
|
+
estimator_ly = KDEMultivariate(data=[events_x.flatten(),
|
|
303
|
+
events_y.flatten()],
|
|
304
|
+
var_type='cc', bw=bw)
|
|
305
|
+
|
|
306
|
+
density = estimator_ly.pdf(positions)
|
|
307
|
+
return density.reshape(xout.shape)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
methods = {"gauss": kde_gauss,
|
|
311
|
+
"histogram": kde_histogram,
|
|
312
|
+
"none": kde_none,
|
|
313
|
+
"multivariate": kde_multivariate}
|
dclab/kde_contours.py
ADDED
dclab/kde_methods.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
from .kde.methods import ( # noqa: F401
|
|
4
|
+
bin_num_doane, bin_width_doane, bin_width_percentile, get_bad_vals,
|
|
5
|
+
ignore_nan_inf, kde_gauss, kde_histogram, kde_multivariate, kde_none,
|
|
6
|
+
methods
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
warnings.warn("`dclab.kde_methods` is deprecated; please use "
|
|
10
|
+
"the `dclab.kde.methods` instead",
|
|
11
|
+
DeprecationWarning)
|
dclab/lme4/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
"""A wrapper around R with the lme4 package"""
|
|
2
|
+
from . import rsetup, wrapr # noqa: F401
|
|
3
|
+
from .wrapr import Rlme4, bootstrapped_median_distributions # noqa: F401
|
|
4
|
+
from .rsetup import ( # noqa: F401
|
|
5
|
+
set_r_lib_path, get_r_path, get_r_version, require_lme4, set_r_path)
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
require(stats);
|
|
2
|
+
require(lme4);
|
|
3
|
+
|
|
4
|
+
model_name <- "<MODEL_NAME>"
|
|
5
|
+
cat("OUTPUT model:", model_name, "#*#\n")
|
|
6
|
+
|
|
7
|
+
func_model <- "feature ~ group + (1 + group | repetition)"
|
|
8
|
+
func_nullmodel <- "feature ~ (1 + group | repetition)"
|
|
9
|
+
|
|
10
|
+
# These are the feature, group, and repetition arrays that are set by dclab
|
|
11
|
+
# via templates.
|
|
12
|
+
feature <- c(<FEATURES>)
|
|
13
|
+
group <- c(<GROUPS>)
|
|
14
|
+
repetition <- c(<REPETITIONS>)
|
|
15
|
+
|
|
16
|
+
data <- data.frame(feature, group, repetition)
|
|
17
|
+
|
|
18
|
+
if (model_name == "glmer+loglink") {
|
|
19
|
+
Model <- glmer(func_model, data, family=Gamma(link='log'))
|
|
20
|
+
NullModel <- glmer(func_nullmodel, data, family=Gamma(link='log'))
|
|
21
|
+
} else if (model_name == "lmer") {
|
|
22
|
+
Model <- lmer(func_model, data)
|
|
23
|
+
NullModel <- lmer(func_nullmodel, data)
|
|
24
|
+
} else {
|
|
25
|
+
stop("Invalid model_name:", model_name)
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
# Anova analysis (increase verbosity by making models global)
|
|
29
|
+
# Using anova is a very conservative way of determining
|
|
30
|
+
# p values.
|
|
31
|
+
res_anova <- anova(Model, NullModel)
|
|
32
|
+
cat("OUTPUT r anova: ")
|
|
33
|
+
res_anova
|
|
34
|
+
cat("#*#\n")
|
|
35
|
+
|
|
36
|
+
pvalue <- res_anova$"Pr(>Chisq)"[2]
|
|
37
|
+
cat("OUTPUT anova p-value:", pvalue, "#*#\n")
|
|
38
|
+
|
|
39
|
+
model_summary <- summary(Model)
|
|
40
|
+
cat("OUTPUT r model summary:")
|
|
41
|
+
model_summary
|
|
42
|
+
cat("#*#\n")
|
|
43
|
+
|
|
44
|
+
model_coefficients <- coef(Model)
|
|
45
|
+
cat("OUTPUT r model coefficients:")
|
|
46
|
+
model_coefficients
|
|
47
|
+
cat("#*#\n")
|
|
48
|
+
|
|
49
|
+
fe_reps <- model_coefficients$repetition
|
|
50
|
+
|
|
51
|
+
effects <- data.frame(coef(model_summary))
|
|
52
|
+
|
|
53
|
+
fe_icept <- effects$Estimate[1]
|
|
54
|
+
|
|
55
|
+
fe_treat <- effects$Estimate[2]
|
|
56
|
+
|
|
57
|
+
if (model_name == "glmer+loglink") {
|
|
58
|
+
# transform back from log
|
|
59
|
+
fe_treat <- exp(fe_icept + fe_treat) - exp(fe_icept)
|
|
60
|
+
fe_icept <- exp(fe_icept)
|
|
61
|
+
fe_reps[, 2] = exp(fe_reps[, 1] + fe_reps[, 2]) - exp(fe_reps[, 1])
|
|
62
|
+
fe_reps[, 1] = exp(fe_reps[, 1])
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
cat("OUTPUT fixed effects intercept:", fe_icept, "#*#\n")
|
|
66
|
+
cat("OUTPUT fixed effects treatment:", fe_treat, "#*#\n")
|
|
67
|
+
cat("OUTPUT fixed effects repetitions:")
|
|
68
|
+
fe_reps
|
|
69
|
+
cat("#*#\n")
|
|
70
|
+
|
|
71
|
+
# convergence
|
|
72
|
+
|
|
73
|
+
# convergence warnings in lme4
|
|
74
|
+
is_warning_generated <- function(m) {
|
|
75
|
+
df <- summary(m)
|
|
76
|
+
!is.null(df$optinfo$conv$lme4$messages) &&
|
|
77
|
+
grepl('failed to converge', df$optinfo$conv$lme4$messages)
|
|
78
|
+
}
|
|
79
|
+
lme4_not_converged <- is_warning_generated(Model)
|
|
80
|
+
|
|
81
|
+
# convergence code by the optimizer
|
|
82
|
+
lme4l <- model_summary$optinfo$conv$lme4
|
|
83
|
+
if (length(lme4l) == 0) {
|
|
84
|
+
# the optimizer probably does not know
|
|
85
|
+
conv_code <- 0
|
|
86
|
+
} else if (is.null(lme4l$code)) {
|
|
87
|
+
# NULL means 0
|
|
88
|
+
conv_code <- 0
|
|
89
|
+
} else {
|
|
90
|
+
conv_code <- lme4l$code
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
cat("OUTPUT model converged:", (conv_code == 0) && !lme4_not_converged, "#*#\n")
|
|
94
|
+
cat("OUTPUT lme4 messages:", lme4l$optinfo$conv$lme4$messages, "#*#\n")
|
dclab/lme4/rsetup.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import pathlib
|
|
4
|
+
import shutil
|
|
5
|
+
import subprocess as sp
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
_has_lme4 = None
|
|
10
|
+
_has_r = None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CommandFailedError(BaseException):
|
|
14
|
+
"""Used when `run_command` encounters an error"""
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class RNotFoundError(BaseException):
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_r_path():
|
|
23
|
+
"""Return the path of the R executable"""
|
|
24
|
+
# Maybe the user set the executable already?
|
|
25
|
+
r_exec = os.environ.get("R_EXEC")
|
|
26
|
+
if r_exec is not None:
|
|
27
|
+
r_exec = pathlib.Path(r_exec)
|
|
28
|
+
if r_exec.is_file():
|
|
29
|
+
return r_exec
|
|
30
|
+
|
|
31
|
+
# Try to get the executable using which
|
|
32
|
+
r_exec = shutil.which("R")
|
|
33
|
+
if r_exec is not None:
|
|
34
|
+
r_exec = pathlib.Path(r_exec)
|
|
35
|
+
return r_exec
|
|
36
|
+
|
|
37
|
+
# Try to determine the path to the executable from R_HOME
|
|
38
|
+
r_home = os.environ.get("R_HOME")
|
|
39
|
+
if r_home and not pathlib.Path(r_home).is_dir():
|
|
40
|
+
logger.warning(f"R_HOME Directory does not exist: {r_home}")
|
|
41
|
+
r_home = None
|
|
42
|
+
|
|
43
|
+
if r_home is None:
|
|
44
|
+
raise RNotFoundError(
|
|
45
|
+
"Cannot find R, please set the `R_HOME` environment variable "
|
|
46
|
+
"or use `set_r_path`.")
|
|
47
|
+
|
|
48
|
+
r_home = pathlib.Path(r_home)
|
|
49
|
+
|
|
50
|
+
# search for the R executable
|
|
51
|
+
for rr in [
|
|
52
|
+
r_home / "bin" / "R",
|
|
53
|
+
r_home / "bin" / "x64" / "R",
|
|
54
|
+
]:
|
|
55
|
+
if rr.is_file():
|
|
56
|
+
return rr
|
|
57
|
+
rr_win = rr.with_name("R.exe")
|
|
58
|
+
if rr_win.is_file():
|
|
59
|
+
return rr_win
|
|
60
|
+
else:
|
|
61
|
+
raise RNotFoundError(
|
|
62
|
+
f"Could not find R binary in '{r_home}'")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_r_script_path():
|
|
66
|
+
"""Return the path to the Rscript executable"""
|
|
67
|
+
return get_r_path().with_name("Rscript")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def get_r_version():
|
|
71
|
+
"""Return the full R version string"""
|
|
72
|
+
require_r()
|
|
73
|
+
cmd = (str(get_r_path()), "--version")
|
|
74
|
+
logger.debug(f"Looking for R version with: {' '.join(cmd)}")
|
|
75
|
+
r_version = run_command(
|
|
76
|
+
cmd,
|
|
77
|
+
env={"R_LIBS_USER": os.environ.get("R_LIBS_USER", "")},
|
|
78
|
+
)
|
|
79
|
+
r_version = r_version.split(os.linesep)
|
|
80
|
+
if r_version[0].startswith("WARNING"):
|
|
81
|
+
r_version = r_version[1]
|
|
82
|
+
else:
|
|
83
|
+
r_version = r_version[0]
|
|
84
|
+
logger.info(f"R version found: {r_version}")
|
|
85
|
+
# get the actual version string
|
|
86
|
+
if r_version.startswith("R version "):
|
|
87
|
+
r_version = r_version.split(" ", 2)[2]
|
|
88
|
+
return r_version.strip()
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def has_lme4():
|
|
92
|
+
"""Return True if the lme4 package is installed"""
|
|
93
|
+
global _has_lme4
|
|
94
|
+
if _has_lme4:
|
|
95
|
+
return True
|
|
96
|
+
require_r()
|
|
97
|
+
for pkg in ["lme4", "statmod", "nloptr"]:
|
|
98
|
+
res = run_command(
|
|
99
|
+
(str(get_r_path()), "-q", "-e", f"system.file(package='{pkg}')"),
|
|
100
|
+
env={"R_LIBS_USER": os.environ.get("R_LIBS_USER", "")},
|
|
101
|
+
)
|
|
102
|
+
if not res.split("[1]")[1].count(pkg):
|
|
103
|
+
avail = False
|
|
104
|
+
break
|
|
105
|
+
else:
|
|
106
|
+
avail = _has_lme4 = True
|
|
107
|
+
return avail
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def has_r():
|
|
111
|
+
"""Return True if R is available"""
|
|
112
|
+
global _has_r
|
|
113
|
+
if _has_r:
|
|
114
|
+
return True
|
|
115
|
+
try:
|
|
116
|
+
hasr = get_r_path().is_file()
|
|
117
|
+
except RNotFoundError:
|
|
118
|
+
hasr = False
|
|
119
|
+
if hasr:
|
|
120
|
+
_has_r = True
|
|
121
|
+
return hasr
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def require_lme4():
|
|
125
|
+
"""Install the lme4 package (if not already installed)
|
|
126
|
+
|
|
127
|
+
Besides ``lme4``, this also installs ``nloptr`` and ``statmod``.
|
|
128
|
+
The packages are installed to the user data directory
|
|
129
|
+
given in :const:`lib_path` from the http://cran.rstudio.org mirror.
|
|
130
|
+
"""
|
|
131
|
+
install_command = ("install.packages("
|
|
132
|
+
"c('statmod','nloptr','lme4'),"
|
|
133
|
+
"repos='http://cran.rstudio.org'"
|
|
134
|
+
")"
|
|
135
|
+
)
|
|
136
|
+
require_r()
|
|
137
|
+
if not has_lme4():
|
|
138
|
+
run_command(cmd=(get_r_path(), "-e", install_command),
|
|
139
|
+
env={"R_LIBS_USER": os.environ.get("R_LIBS_USER", "")},
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def require_r():
|
|
144
|
+
"""Make sure R is installed an R HOME is set"""
|
|
145
|
+
if not has_r():
|
|
146
|
+
raise RNotFoundError("Cannot find R, please set its path with the "
|
|
147
|
+
"`set_r_path` function or set the `RHOME` "
|
|
148
|
+
"environment variable.")
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def run_command(cmd, **kwargs):
|
|
152
|
+
"""Run a command via subprocess"""
|
|
153
|
+
if hasattr(sp, "STARTUPINFO"):
|
|
154
|
+
# On Windows, subprocess calls will pop up a command window by
|
|
155
|
+
# default when run from Pyinstaller with the ``--noconsole``
|
|
156
|
+
# option. Avoid this distraction.
|
|
157
|
+
si = sp.STARTUPINFO()
|
|
158
|
+
si.dwFlags |= sp.STARTF_USESHOWWINDOW
|
|
159
|
+
# Windows doesn't search the path by default. Pass it an
|
|
160
|
+
# environment so it will.
|
|
161
|
+
env = os.environ
|
|
162
|
+
else:
|
|
163
|
+
si = None
|
|
164
|
+
env = None
|
|
165
|
+
|
|
166
|
+
kwargs.setdefault("text", True)
|
|
167
|
+
kwargs.setdefault("stderr", sp.STDOUT)
|
|
168
|
+
if env is not None:
|
|
169
|
+
if "env" in kwargs:
|
|
170
|
+
env.update(kwargs.pop("env"))
|
|
171
|
+
kwargs["env"] = env
|
|
172
|
+
kwargs["startupinfo"] = si
|
|
173
|
+
|
|
174
|
+
# Convert paths to strings
|
|
175
|
+
cmd = [str(cc) for cc in cmd]
|
|
176
|
+
|
|
177
|
+
try:
|
|
178
|
+
tmp = sp.check_output(cmd, **kwargs)
|
|
179
|
+
except sp.CalledProcessError as e:
|
|
180
|
+
raise CommandFailedError(f"The command '{' '.join(cmd)}' failed with "
|
|
181
|
+
f"exit code {e.returncode}: {e.output}")
|
|
182
|
+
|
|
183
|
+
return tmp.strip()
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def set_r_lib_path(r_lib_path):
|
|
187
|
+
"""Add given directory to the R_LIBS_USER environment variable"""
|
|
188
|
+
paths = os.environ.get("R_LIBS_USER", "").split(os.pathsep)
|
|
189
|
+
paths = [p for p in paths if p]
|
|
190
|
+
paths.append(str(r_lib_path).strip())
|
|
191
|
+
os.environ["R_LIBS_USER"] = os.pathsep.join(list(set(paths)))
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def set_r_path(r_path):
|
|
195
|
+
"""Set the path of the R executable/binary"""
|
|
196
|
+
tmp = run_command((str(r_path), "RHOME"))
|
|
197
|
+
|
|
198
|
+
r_home = tmp.split(os.linesep)
|
|
199
|
+
if r_home[0].startswith("WARNING"):
|
|
200
|
+
res = r_home[1]
|
|
201
|
+
else:
|
|
202
|
+
res = r_home[0].strip()
|
|
203
|
+
os.environ["R_HOME"] = res
|
|
204
|
+
os.environ["R_EXEC"] = str(pathlib.Path(r_path).resolve())
|