dclab 0.67.0__cp314-cp314-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dclab might be problematic. Click here for more details.
- dclab/__init__.py +41 -0
- dclab/_version.py +34 -0
- dclab/cached.py +97 -0
- dclab/cli/__init__.py +10 -0
- dclab/cli/common.py +237 -0
- dclab/cli/task_compress.py +126 -0
- dclab/cli/task_condense.py +223 -0
- dclab/cli/task_join.py +229 -0
- dclab/cli/task_repack.py +98 -0
- dclab/cli/task_split.py +154 -0
- dclab/cli/task_tdms2rtdc.py +186 -0
- dclab/cli/task_verify_dataset.py +75 -0
- dclab/definitions/__init__.py +79 -0
- dclab/definitions/feat_const.py +202 -0
- dclab/definitions/feat_logic.py +182 -0
- dclab/definitions/meta_const.py +252 -0
- dclab/definitions/meta_logic.py +111 -0
- dclab/definitions/meta_parse.py +94 -0
- dclab/downsampling.cpython-314-darwin.so +0 -0
- dclab/downsampling.pyx +230 -0
- dclab/external/__init__.py +4 -0
- dclab/external/packaging/LICENSE +3 -0
- dclab/external/packaging/LICENSE.APACHE +177 -0
- dclab/external/packaging/LICENSE.BSD +23 -0
- dclab/external/packaging/__init__.py +6 -0
- dclab/external/packaging/_structures.py +61 -0
- dclab/external/packaging/version.py +505 -0
- dclab/external/skimage/LICENSE +28 -0
- dclab/external/skimage/__init__.py +2 -0
- dclab/external/skimage/_find_contours.py +216 -0
- dclab/external/skimage/_find_contours_cy.cpython-314-darwin.so +0 -0
- dclab/external/skimage/_find_contours_cy.pyx +188 -0
- dclab/external/skimage/_pnpoly.cpython-314-darwin.so +0 -0
- dclab/external/skimage/_pnpoly.pyx +99 -0
- dclab/external/skimage/_shared/__init__.py +1 -0
- dclab/external/skimage/_shared/geometry.cpython-314-darwin.so +0 -0
- dclab/external/skimage/_shared/geometry.pxd +6 -0
- dclab/external/skimage/_shared/geometry.pyx +55 -0
- dclab/external/skimage/measure.py +7 -0
- dclab/external/skimage/pnpoly.py +53 -0
- dclab/external/statsmodels/LICENSE +35 -0
- dclab/external/statsmodels/__init__.py +6 -0
- dclab/external/statsmodels/nonparametric/__init__.py +1 -0
- dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
- dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
- dclab/external/statsmodels/nonparametric/kernels.py +36 -0
- dclab/features/__init__.py +9 -0
- dclab/features/bright.py +81 -0
- dclab/features/bright_bc.py +93 -0
- dclab/features/bright_perc.py +63 -0
- dclab/features/contour.py +161 -0
- dclab/features/emodulus/__init__.py +339 -0
- dclab/features/emodulus/load.py +252 -0
- dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
- dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
- dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
- dclab/features/emodulus/pxcorr.py +135 -0
- dclab/features/emodulus/scale_linear.py +247 -0
- dclab/features/emodulus/viscosity.py +260 -0
- dclab/features/fl_crosstalk.py +95 -0
- dclab/features/inert_ratio.py +377 -0
- dclab/features/volume.py +242 -0
- dclab/http_utils.py +322 -0
- dclab/isoelastics/__init__.py +468 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
- dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
- dclab/kde/__init__.py +1 -0
- dclab/kde/base.py +459 -0
- dclab/kde/contours.py +222 -0
- dclab/kde/methods.py +313 -0
- dclab/kde_contours.py +10 -0
- dclab/kde_methods.py +11 -0
- dclab/lme4/__init__.py +5 -0
- dclab/lme4/lme4_template.R +94 -0
- dclab/lme4/rsetup.py +204 -0
- dclab/lme4/wrapr.py +386 -0
- dclab/polygon_filter.py +398 -0
- dclab/rtdc_dataset/__init__.py +15 -0
- dclab/rtdc_dataset/check.py +902 -0
- dclab/rtdc_dataset/config.py +533 -0
- dclab/rtdc_dataset/copier.py +353 -0
- dclab/rtdc_dataset/core.py +896 -0
- dclab/rtdc_dataset/export.py +867 -0
- dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
- dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
- dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
- dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
- dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
- dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
- dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
- dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
- dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
- dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
- dclab/rtdc_dataset/feat_basin.py +762 -0
- dclab/rtdc_dataset/feat_temp.py +102 -0
- dclab/rtdc_dataset/filter.py +263 -0
- dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
- dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
- dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
- dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
- dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
- dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
- dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
- dclab/rtdc_dataset/fmt_dict.py +103 -0
- dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
- dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
- dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
- dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
- dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
- dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
- dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
- dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
- dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
- dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
- dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
- dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
- dclab/rtdc_dataset/fmt_http.py +102 -0
- dclab/rtdc_dataset/fmt_s3.py +354 -0
- dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
- dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
- dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
- dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
- dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
- dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
- dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
- dclab/rtdc_dataset/load.py +77 -0
- dclab/rtdc_dataset/meta_table.py +25 -0
- dclab/rtdc_dataset/writer.py +1019 -0
- dclab/statistics.py +226 -0
- dclab/util.py +176 -0
- dclab/warn.py +15 -0
- dclab-0.67.0.dist-info/METADATA +153 -0
- dclab-0.67.0.dist-info/RECORD +142 -0
- dclab-0.67.0.dist-info/WHEEL +6 -0
- dclab-0.67.0.dist-info/entry_points.txt +8 -0
- dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
- dclab-0.67.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module containing the base object for multivariate kernel density and
|
|
3
|
+
regression, plus some utilities.
|
|
4
|
+
"""
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from . import kernels
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
kernel_func = dict(gaussian=kernels.gaussian)
|
|
11
|
+
has_joblib = False
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GenericKDE(object):
|
|
15
|
+
"""
|
|
16
|
+
Base class for density estimation and regression KDE classes.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def _compute_bw(self, bw):
|
|
20
|
+
"""
|
|
21
|
+
Computes the bandwidth of the data.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
bw: array_like or str
|
|
26
|
+
If array_like: user-specified bandwidth.
|
|
27
|
+
If a string, should be one of:
|
|
28
|
+
|
|
29
|
+
- cv_ml: cross validation maximum likelihood
|
|
30
|
+
- normal_reference: normal reference rule of thumb
|
|
31
|
+
- cv_ls: cross validation least squares
|
|
32
|
+
|
|
33
|
+
Notes
|
|
34
|
+
-----
|
|
35
|
+
The default values for bw is 'normal_reference'.
|
|
36
|
+
"""
|
|
37
|
+
if bw is None:
|
|
38
|
+
bw = 'normal_reference'
|
|
39
|
+
|
|
40
|
+
if not isinstance(bw, str):
|
|
41
|
+
self._bw_method = "user-specified"
|
|
42
|
+
res = np.asarray(bw)
|
|
43
|
+
else:
|
|
44
|
+
# The user specified a bandwidth selection method
|
|
45
|
+
self._bw_method = bw
|
|
46
|
+
# Workaround to avoid instance methods in __dict__
|
|
47
|
+
if bw == 'normal_reference':
|
|
48
|
+
bwfunc = self._normal_reference
|
|
49
|
+
elif bw == 'cv_ml':
|
|
50
|
+
bwfunc = self._cv_ml
|
|
51
|
+
else: # bw == 'cv_ls'
|
|
52
|
+
bwfunc = self._cv_ls
|
|
53
|
+
res = bwfunc()
|
|
54
|
+
|
|
55
|
+
return res
|
|
56
|
+
|
|
57
|
+
def _set_defaults(self, defaults):
|
|
58
|
+
"""Sets the default values for the efficient estimation"""
|
|
59
|
+
self.n_res = defaults.n_res
|
|
60
|
+
self.n_sub = defaults.n_sub
|
|
61
|
+
self.randomize = defaults.randomize
|
|
62
|
+
self.return_median = defaults.return_median
|
|
63
|
+
self.efficient = defaults.efficient
|
|
64
|
+
self.return_only_bw = defaults.return_only_bw
|
|
65
|
+
self.n_jobs = defaults.n_jobs
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class EstimatorSettings(object):
|
|
69
|
+
"""
|
|
70
|
+
Object to specify settings for density estimation or regression.
|
|
71
|
+
|
|
72
|
+
`EstimatorSettings` has several proporties related to how bandwidth
|
|
73
|
+
estimation for the `KDEMultivariate`, `KDEMultivariateConditional`,
|
|
74
|
+
`KernelReg` and `CensoredKernelReg` classes behaves.
|
|
75
|
+
|
|
76
|
+
Parameters
|
|
77
|
+
----------
|
|
78
|
+
efficient: bool, optional
|
|
79
|
+
If True, the bandwidth estimation is to be performed
|
|
80
|
+
efficiently -- by taking smaller sub-samples and estimating
|
|
81
|
+
the scaling factor of each subsample. This is useful for large
|
|
82
|
+
samples (nobs >> 300) and/or multiple variables (k_vars > 3).
|
|
83
|
+
If False (default), all data is used at the same time.
|
|
84
|
+
randomize: bool, optional
|
|
85
|
+
If True, the bandwidth estimation is to be performed by
|
|
86
|
+
taking `n_res` random resamples (with replacement) of size `n_sub` from
|
|
87
|
+
the full sample. If set to False (default), the estimation is
|
|
88
|
+
performed by slicing the full sample in sub-samples of size `n_sub` so
|
|
89
|
+
that all samples are used once.
|
|
90
|
+
n_sub: int, optional
|
|
91
|
+
Size of the sub-samples. Default is 50.
|
|
92
|
+
n_res: int, optional
|
|
93
|
+
The number of random re-samples used to estimate the bandwidth.
|
|
94
|
+
Only has an effect if ``randomize == True``. Default value is 25.
|
|
95
|
+
return_median: bool, optional
|
|
96
|
+
If True (default), the estimator uses the median of all scaling factors
|
|
97
|
+
for each sub-sample to estimate the bandwidth of the full sample.
|
|
98
|
+
If False, the estimator uses the mean.
|
|
99
|
+
return_only_bw: bool, optional
|
|
100
|
+
If True, the estimator is to use the bandwidth and not the
|
|
101
|
+
scaling factor. This is *not* theoretically justified.
|
|
102
|
+
Should be used only for experimenting.
|
|
103
|
+
n_jobs : int, optional
|
|
104
|
+
The number of jobs to use for parallel estimation with
|
|
105
|
+
``joblib.Parallel``. Default is -1, meaning ``n_cores - 1``, with
|
|
106
|
+
``n_cores`` the number of available CPU cores.
|
|
107
|
+
See the `joblib documentation
|
|
108
|
+
<https://pythonhosted.org/joblib/parallel.html>`_ for more details.
|
|
109
|
+
|
|
110
|
+
Examples
|
|
111
|
+
--------
|
|
112
|
+
>>> settings = EstimatorSettings(randomize=True, n_jobs=3)
|
|
113
|
+
>>> k_dens = KDEMultivariate(data, var_type, defaults=settings)
|
|
114
|
+
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
def __init__(self, efficient=False, randomize=False, n_res=25, n_sub=50,
|
|
118
|
+
return_median=True, return_only_bw=False, n_jobs=-1):
|
|
119
|
+
self.efficient = efficient
|
|
120
|
+
self.randomize = randomize
|
|
121
|
+
self.n_res = n_res
|
|
122
|
+
self.n_sub = n_sub
|
|
123
|
+
self.return_median = return_median
|
|
124
|
+
self.return_only_bw = return_only_bw # TODO: remove this?
|
|
125
|
+
self.n_jobs = n_jobs
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _adjust_shape(dat, k_vars):
|
|
129
|
+
""" Returns an array of shape (nobs, k_vars) for use with `gpke`."""
|
|
130
|
+
dat = np.asarray(dat)
|
|
131
|
+
if dat.ndim > 2:
|
|
132
|
+
dat = np.squeeze(dat)
|
|
133
|
+
if dat.ndim == 1 and k_vars > 1: # one obs many vars
|
|
134
|
+
nobs = 1
|
|
135
|
+
elif dat.ndim == 1 and k_vars == 1: # one obs one var
|
|
136
|
+
nobs = len(dat)
|
|
137
|
+
else:
|
|
138
|
+
if np.shape(dat)[0] == k_vars and np.shape(dat)[1] != k_vars:
|
|
139
|
+
dat = dat.T
|
|
140
|
+
|
|
141
|
+
nobs = np.shape(dat)[0] # ndim >1 so many obs many vars
|
|
142
|
+
|
|
143
|
+
dat = np.reshape(dat, (nobs, k_vars))
|
|
144
|
+
return dat
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def gpke(bw, data, data_predict, var_type, ckertype='gaussian',
|
|
148
|
+
okertype='wangryzin', ukertype='aitchisonaitken', tosum=True):
|
|
149
|
+
r"""
|
|
150
|
+
Returns the non-normalized Generalized Product Kernel Estimator
|
|
151
|
+
|
|
152
|
+
Parameters
|
|
153
|
+
----------
|
|
154
|
+
bw: 1-D ndarray
|
|
155
|
+
The user-specified bandwidth parameters.
|
|
156
|
+
data: 1D or 2-D ndarray
|
|
157
|
+
The training data.
|
|
158
|
+
data_predict: 1-D ndarray
|
|
159
|
+
The evaluation points at which the kernel estimation is performed.
|
|
160
|
+
var_type: str, optional
|
|
161
|
+
The variable type (continuous, ordered, unordered).
|
|
162
|
+
ckertype: str, optional
|
|
163
|
+
The kernel used for the continuous variables.
|
|
164
|
+
okertype: str, optional
|
|
165
|
+
The kernel used for the ordered discrete variables.
|
|
166
|
+
ukertype: str, optional
|
|
167
|
+
The kernel used for the unordered discrete variables.
|
|
168
|
+
tosum : bool, optional
|
|
169
|
+
Whether or not to sum the calculated array of densities. Default is
|
|
170
|
+
True.
|
|
171
|
+
|
|
172
|
+
Returns
|
|
173
|
+
-------
|
|
174
|
+
dens: array-like
|
|
175
|
+
The generalized product kernel density estimator.
|
|
176
|
+
|
|
177
|
+
Notes
|
|
178
|
+
-----
|
|
179
|
+
The formula for the multivariate kernel estimator for the pdf is:
|
|
180
|
+
|
|
181
|
+
.. math:: f(x)=\frac{1}{nh_{1}...h_{q}}\sum_{i=1}^
|
|
182
|
+
{n}K\left(\frac{X_{i}-x}{h}\right)
|
|
183
|
+
|
|
184
|
+
where
|
|
185
|
+
|
|
186
|
+
.. math:: K\left(\frac{X_{i}-x}{h}\right) =
|
|
187
|
+
k\left( \frac{X_{i1}-x_{1}}{h_{1}}\right)\times
|
|
188
|
+
k\left( \frac{X_{i2}-x_{2}}{h_{2}}\right)\times...\times
|
|
189
|
+
k\left(\frac{X_{iq}-x_{q}}{h_{q}}\right)
|
|
190
|
+
"""
|
|
191
|
+
kertypes = dict(c=ckertype, o=okertype, u=ukertype)
|
|
192
|
+
|
|
193
|
+
Kval = np.empty(data.shape)
|
|
194
|
+
for ii, vtype in enumerate(var_type):
|
|
195
|
+
func = kernel_func[kertypes[vtype]]
|
|
196
|
+
Kval[:, ii] = func(bw[ii], data[:, ii], data_predict[ii])
|
|
197
|
+
|
|
198
|
+
iscontinuous = np.array([c == 'c' for c in var_type])
|
|
199
|
+
dens = Kval.prod(axis=1) / np.prod(bw[iscontinuous])
|
|
200
|
+
if tosum:
|
|
201
|
+
return dens.sum(axis=0)
|
|
202
|
+
else:
|
|
203
|
+
return dens
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Multivariate Conditional and Unconditional Kernel Density Estimation
|
|
3
|
+
with Mixed Data Types.
|
|
4
|
+
|
|
5
|
+
References
|
|
6
|
+
----------
|
|
7
|
+
[1] Racine, J., Li, Q. Nonparametric econometrics: theory and practice.
|
|
8
|
+
Princeton University Press. (2007)
|
|
9
|
+
[2] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation
|
|
10
|
+
and Trends in Econometrics: Vol 3: No 1, pp1-88. (2008)
|
|
11
|
+
https://dx.doi.org/10.1561/0800000009
|
|
12
|
+
[3] Racine, J., Li, Q. "Nonparametric Estimation of Distributions
|
|
13
|
+
with Categorical and Continuous Data." Working Paper. (2000)
|
|
14
|
+
[4] Racine, J. Li, Q. "Kernel Estimation of Multivariate Conditional
|
|
15
|
+
Distributions Annals of Economics and Finance 5, 211-235 (2004)
|
|
16
|
+
[5] Liu, R., Yang, L. "Kernel estimation of multivariate
|
|
17
|
+
cumulative distribution function."
|
|
18
|
+
Journal of Nonparametric Statistics (2008)
|
|
19
|
+
[6] Li, R., Ju, G. "Nonparametric Estimation of Multivariate CDF
|
|
20
|
+
with Categorical and Continuous Data." Working Paper
|
|
21
|
+
[7] Li, Q., Racine, J. "Cross-validated local linear nonparametric
|
|
22
|
+
regression" Statistica Sinica 14(2004), pp. 485-512
|
|
23
|
+
[8] Racine, J.: "Consistent Significance Testing for Nonparametric
|
|
24
|
+
Regression" Journal of Business & Economics Statistics
|
|
25
|
+
[9] Racine, J., Hart, J., Li, Q., "Testing the Significance of
|
|
26
|
+
Categorical Predictor Variables in Nonparametric Regression
|
|
27
|
+
Models", 2006, Econometric Reviews 25, 523-544
|
|
28
|
+
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
import numpy as np
|
|
32
|
+
|
|
33
|
+
from ._kernel_base import GenericKDE, EstimatorSettings, gpke, _adjust_shape
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
__all__ = ['KDEMultivariate', 'EstimatorSettings']
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class KDEMultivariate(GenericKDE):
|
|
40
|
+
"""
|
|
41
|
+
Multivariate kernel density estimator.
|
|
42
|
+
|
|
43
|
+
This density estimator can handle univariate as well as multivariate data,
|
|
44
|
+
including mixed continuous / ordered discrete / unordered discrete data.
|
|
45
|
+
It also provides cross-validated bandwidth selection methods (least
|
|
46
|
+
squares, maximum likelihood).
|
|
47
|
+
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
data: list of ndarrays or 2-D ndarray
|
|
51
|
+
The training data for the Kernel Density Estimation, used to determine
|
|
52
|
+
the bandwidth(s). If a 2-D array, should be of shape
|
|
53
|
+
(num_observations, num_variables). If a list, each list element is a
|
|
54
|
+
separate observation.
|
|
55
|
+
var_type: str
|
|
56
|
+
The type of the variables:
|
|
57
|
+
|
|
58
|
+
- c : continuous
|
|
59
|
+
- u : unordered (discrete)
|
|
60
|
+
- o : ordered (discrete)
|
|
61
|
+
|
|
62
|
+
The string should contain a type specifier for each variable, so for
|
|
63
|
+
example ``var_type='ccuo'``.
|
|
64
|
+
bw: array_like or str, optional
|
|
65
|
+
If an array, it is a fixed user-specified bandwidth. If a string,
|
|
66
|
+
should be one of:
|
|
67
|
+
|
|
68
|
+
- normal_reference: normal reference rule of thumb (default)
|
|
69
|
+
- cv_ml: cross validation maximum likelihood
|
|
70
|
+
- cv_ls: cross validation least squares
|
|
71
|
+
|
|
72
|
+
defaults: EstimatorSettings instance, optional
|
|
73
|
+
The default values for (efficient) bandwidth estimation.
|
|
74
|
+
|
|
75
|
+
Attributes
|
|
76
|
+
----------
|
|
77
|
+
bw: array_like
|
|
78
|
+
The bandwidth parameters.
|
|
79
|
+
|
|
80
|
+
See Also
|
|
81
|
+
--------
|
|
82
|
+
KDEMultivariateConditional
|
|
83
|
+
|
|
84
|
+
Examples
|
|
85
|
+
--------
|
|
86
|
+
>>> import statsmodels.api as sm
|
|
87
|
+
>>> nobs = 300
|
|
88
|
+
>>> np.random.seed(1234) # Seed random generator
|
|
89
|
+
>>> c1 = np.random.normal(size=(nobs,1))
|
|
90
|
+
>>> c2 = np.random.normal(2, 1, size=(nobs,1))
|
|
91
|
+
|
|
92
|
+
Estimate a bivariate distribution and display the bandwidth found:
|
|
93
|
+
|
|
94
|
+
>>> dens_u = sm.nonparametric.KDEMultivariate(data=[c1,c2],
|
|
95
|
+
... var_type='cc', bw='normal_reference')
|
|
96
|
+
>>> dens_u.bw
|
|
97
|
+
array([ 0.39967419, 0.38423292])
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
def __init__(self, data, var_type, bw=None, defaults=None):
|
|
101
|
+
self.var_type = var_type
|
|
102
|
+
self.k_vars = len(self.var_type)
|
|
103
|
+
self.data = _adjust_shape(data, self.k_vars)
|
|
104
|
+
self.data_type = var_type
|
|
105
|
+
self.nobs, self.k_vars = np.shape(self.data)
|
|
106
|
+
if self.nobs <= self.k_vars:
|
|
107
|
+
raise ValueError("The number of observations must be larger "
|
|
108
|
+
"than the number of variables.")
|
|
109
|
+
defaults = EstimatorSettings() if defaults is None else defaults
|
|
110
|
+
self._set_defaults(defaults)
|
|
111
|
+
if not self.efficient:
|
|
112
|
+
self.bw = self._compute_bw(bw)
|
|
113
|
+
else:
|
|
114
|
+
self.bw = self._compute_efficient(bw)
|
|
115
|
+
|
|
116
|
+
def __repr__(self):
|
|
117
|
+
"""Provide something sane to print."""
|
|
118
|
+
rpr = "KDE instance\n"
|
|
119
|
+
rpr += "Number of variables: k_vars = " + str(self.k_vars) + "\n"
|
|
120
|
+
rpr += "Number of samples: nobs = " + str(self.nobs) + "\n"
|
|
121
|
+
rpr += "Variable types: " + self.var_type + "\n"
|
|
122
|
+
rpr += "BW selection method: " + self._bw_method + "\n"
|
|
123
|
+
return rpr
|
|
124
|
+
|
|
125
|
+
def pdf(self, data_predict=None):
|
|
126
|
+
r"""
|
|
127
|
+
Evaluate the probability density function.
|
|
128
|
+
|
|
129
|
+
Parameters
|
|
130
|
+
----------
|
|
131
|
+
data_predict: array_like, optional
|
|
132
|
+
Points to evaluate at. If unspecified, the training data is used.
|
|
133
|
+
|
|
134
|
+
Returns
|
|
135
|
+
-------
|
|
136
|
+
pdf_est: array_like
|
|
137
|
+
Probability density function evaluated at `data_predict`.
|
|
138
|
+
|
|
139
|
+
Notes
|
|
140
|
+
-----
|
|
141
|
+
The probability density is given by the generalized product kernel
|
|
142
|
+
estimator:
|
|
143
|
+
|
|
144
|
+
.. math:: K_{h}(X_{i},X_{j}) =
|
|
145
|
+
\prod_{s=1}^{q}h_{s}^{-1}k\left(\frac{X_{is}-X_{js}}{h_{s}}\right)
|
|
146
|
+
"""
|
|
147
|
+
if data_predict is None:
|
|
148
|
+
data_predict = self.data
|
|
149
|
+
else:
|
|
150
|
+
data_predict = _adjust_shape(data_predict, self.k_vars)
|
|
151
|
+
|
|
152
|
+
pdf_est = []
|
|
153
|
+
for i in range(np.shape(data_predict)[0]):
|
|
154
|
+
pdf_est.append(gpke(self.bw, data=self.data,
|
|
155
|
+
data_predict=data_predict[i, :],
|
|
156
|
+
var_type=self.var_type) / self.nobs)
|
|
157
|
+
|
|
158
|
+
pdf_est = np.squeeze(pdf_est)
|
|
159
|
+
return pdf_est
|
|
160
|
+
|
|
161
|
+
def _get_class_vars_type(self):
|
|
162
|
+
"""Helper method to be able to pass needed vars to _compute_subset."""
|
|
163
|
+
class_type = 'KDEMultivariate'
|
|
164
|
+
class_vars = (self.var_type, )
|
|
165
|
+
return class_type, class_vars
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module of kernels that are able to handle continuous as well as categorical
|
|
3
|
+
variables (both ordered and unordered).
|
|
4
|
+
|
|
5
|
+
This is a slight deviation from the current approach in
|
|
6
|
+
statsmodels.nonparametric.kernels where each kernel is a class object.
|
|
7
|
+
|
|
8
|
+
Having kernel functions rather than classes makes extension to a multivariate
|
|
9
|
+
kernel density estimation much easier.
|
|
10
|
+
|
|
11
|
+
NOTE: As it is, this module does not interact with the existing API
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def gaussian(h, Xi, x):
|
|
19
|
+
"""
|
|
20
|
+
Gaussian Kernel for continuous variables
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
h : 1-D ndarray, shape (K,)
|
|
24
|
+
The bandwidths used to estimate the value of the kernel function.
|
|
25
|
+
Xi : 1-D ndarray, shape (K,)
|
|
26
|
+
The value of the training set.
|
|
27
|
+
x : 1-D ndarray, shape (K,)
|
|
28
|
+
The value at which the kernel density is being estimated.
|
|
29
|
+
|
|
30
|
+
Returns
|
|
31
|
+
-------
|
|
32
|
+
kernel_value : ndarray, shape (nobs, K)
|
|
33
|
+
The value of the kernel function at each training point for each var.
|
|
34
|
+
|
|
35
|
+
"""
|
|
36
|
+
return (1. / np.sqrt(2 * np.pi)) * np.exp(-(Xi - x)**2 / (h**2 * 2.))
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Basic methods for event feature computation"""
|
|
2
|
+
from . import contour # noqa: F401
|
|
3
|
+
from . import bright # noqa: F401
|
|
4
|
+
from . import bright_bc # noqa: F401
|
|
5
|
+
from . import bright_perc # noqa: F401
|
|
6
|
+
from . import emodulus # noqa: F401
|
|
7
|
+
from . import fl_crosstalk # noqa: F401
|
|
8
|
+
from . import inert_ratio # noqa: F401
|
|
9
|
+
from . import volume # noqa: F401
|
dclab/features/bright.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Computation of mean and standard deviation of grayscale values inside the
|
|
3
|
+
RT-DC event image mask.
|
|
4
|
+
"""
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_bright(mask, image, ret_data="avg,sd"):
|
|
9
|
+
"""Compute avg and/or std of the event brightness
|
|
10
|
+
|
|
11
|
+
The event brightness is defined by the gray-scale values of the
|
|
12
|
+
image data within the event mask area.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
mask: ndarray or list of ndarrays of shape (M,N) and dtype bool
|
|
17
|
+
The mask values, True where the event is located in `image`.
|
|
18
|
+
image: ndarray or list of ndarrays of shape (M,N)
|
|
19
|
+
A 2D array that holds the image in form of grayscale values
|
|
20
|
+
of an event.
|
|
21
|
+
ret_data: str
|
|
22
|
+
A comma-separated list of metrices to compute
|
|
23
|
+
- "avg": compute the average
|
|
24
|
+
- "sd": compute the standard deviation
|
|
25
|
+
Selected metrics are returned in alphabetical order.
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
bright_avg: float or ndarray of size N
|
|
30
|
+
Average image data within the contour
|
|
31
|
+
bright_std: float or ndarray of size N
|
|
32
|
+
Standard deviation of image data within the contour
|
|
33
|
+
"""
|
|
34
|
+
# This method is based on a pull request by Maik Herbig.
|
|
35
|
+
ret_avg = "avg" in ret_data
|
|
36
|
+
ret_std = "sd" in ret_data
|
|
37
|
+
|
|
38
|
+
if ret_avg + ret_std == 0:
|
|
39
|
+
raise ValueError("No valid metrices selected!")
|
|
40
|
+
|
|
41
|
+
if isinstance(mask, np.ndarray) and len(mask.shape) == 2:
|
|
42
|
+
# We have a single image
|
|
43
|
+
image = [image]
|
|
44
|
+
mask = [mask]
|
|
45
|
+
ret_list = False
|
|
46
|
+
else:
|
|
47
|
+
ret_list = True
|
|
48
|
+
|
|
49
|
+
length = min(len(mask), len(image))
|
|
50
|
+
|
|
51
|
+
# Results are stored in a separate array initialized with nans
|
|
52
|
+
if ret_avg:
|
|
53
|
+
avg = np.zeros(length, dtype=np.float64) * np.nan
|
|
54
|
+
if ret_std:
|
|
55
|
+
std = np.zeros(length, dtype=np.float64) * np.nan
|
|
56
|
+
|
|
57
|
+
for ii in range(length):
|
|
58
|
+
imgi = image[ii]
|
|
59
|
+
mski = mask[ii]
|
|
60
|
+
# Assign results
|
|
61
|
+
if ret_avg:
|
|
62
|
+
avg[ii] = np.mean(imgi[mski])
|
|
63
|
+
if ret_std:
|
|
64
|
+
std[ii] = np.std(imgi[mski])
|
|
65
|
+
|
|
66
|
+
results = []
|
|
67
|
+
# Keep alphabetical order
|
|
68
|
+
if ret_avg:
|
|
69
|
+
results.append(avg)
|
|
70
|
+
if ret_std:
|
|
71
|
+
results.append(std)
|
|
72
|
+
|
|
73
|
+
if not ret_list:
|
|
74
|
+
# Only return scalars
|
|
75
|
+
results = [r[0] for r in results]
|
|
76
|
+
|
|
77
|
+
if ret_avg + ret_std == 1:
|
|
78
|
+
# Only return one column
|
|
79
|
+
return results[0]
|
|
80
|
+
|
|
81
|
+
return results
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Computation of mean and standard deviation of grayscale values inside the
|
|
3
|
+
RT-DC event image mask with background-correction taken into account.
|
|
4
|
+
"""
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_bright_bc(mask, image, image_bg, bg_off=None, ret_data="avg,sd"):
|
|
9
|
+
"""Compute avg and/or std of the background-corrected event brightness
|
|
10
|
+
|
|
11
|
+
The background-corrected event brightness is defined by the
|
|
12
|
+
gray-scale values of the background-corrected image data
|
|
13
|
+
within the event mask area.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
mask: ndarray or list of ndarrays of shape (M,N) and dtype bool
|
|
18
|
+
The mask values, True where the event is located in `image`.
|
|
19
|
+
image: ndarray or list of ndarrays of shape (M,N)
|
|
20
|
+
A 2D array that holds the image in form of grayscale values
|
|
21
|
+
of an event.
|
|
22
|
+
image_bg: ndarray or list of ndarrays of shape (M,N)
|
|
23
|
+
A 2D array that holds the background image for the same event.
|
|
24
|
+
bg_off: float or 1D ndarray
|
|
25
|
+
Additional offset value that is added to `image_bg` before
|
|
26
|
+
background correction
|
|
27
|
+
ret_data: str
|
|
28
|
+
A comma-separated list of metrices to compute
|
|
29
|
+
- "avg": compute the average
|
|
30
|
+
- "sd": compute the standard deviation
|
|
31
|
+
Selected metrics are returned in alphabetical order.
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
bright_avg: float or ndarray of size N
|
|
36
|
+
Average image data within the contour
|
|
37
|
+
bright_std: float or ndarray of size N
|
|
38
|
+
Standard deviation of image data within the contour
|
|
39
|
+
"""
|
|
40
|
+
# This method is based on a pull request by Maik Herbig.
|
|
41
|
+
ret_avg = "avg" in ret_data
|
|
42
|
+
ret_std = "sd" in ret_data
|
|
43
|
+
|
|
44
|
+
if ret_avg + ret_std == 0:
|
|
45
|
+
raise ValueError("No valid metrices selected!")
|
|
46
|
+
|
|
47
|
+
if isinstance(mask, np.ndarray) and len(mask.shape) == 2:
|
|
48
|
+
# We have a single image
|
|
49
|
+
image_bg = [image_bg]
|
|
50
|
+
image = [image]
|
|
51
|
+
mask = [mask]
|
|
52
|
+
if bg_off is not None:
|
|
53
|
+
bg_off = np.atleast_1d(bg_off)
|
|
54
|
+
ret_list = False
|
|
55
|
+
else:
|
|
56
|
+
ret_list = True
|
|
57
|
+
|
|
58
|
+
length = min(len(mask), len(image), len(image_bg))
|
|
59
|
+
|
|
60
|
+
# Results are stored in a separate array initialized with nans
|
|
61
|
+
if ret_avg:
|
|
62
|
+
avg = np.zeros(length, dtype=np.float64) * np.nan
|
|
63
|
+
if ret_std:
|
|
64
|
+
std = np.zeros(length, dtype=np.float64) * np.nan
|
|
65
|
+
|
|
66
|
+
for ii in range(length):
|
|
67
|
+
# cast to integer before subtraction
|
|
68
|
+
imgi = np.array(image[ii], dtype=int) - image_bg[ii]
|
|
69
|
+
mski = mask[ii]
|
|
70
|
+
# Assign results
|
|
71
|
+
if ret_avg:
|
|
72
|
+
avg[ii] = np.mean(imgi[mski])
|
|
73
|
+
if ret_std:
|
|
74
|
+
std[ii] = np.std(imgi[mski])
|
|
75
|
+
|
|
76
|
+
results = []
|
|
77
|
+
# Keep alphabetical order
|
|
78
|
+
if ret_avg:
|
|
79
|
+
if bg_off is not None:
|
|
80
|
+
avg -= bg_off
|
|
81
|
+
results.append(avg)
|
|
82
|
+
if ret_std:
|
|
83
|
+
results.append(std)
|
|
84
|
+
|
|
85
|
+
if not ret_list:
|
|
86
|
+
# Only return scalars
|
|
87
|
+
results = [r[0] for r in results]
|
|
88
|
+
|
|
89
|
+
if ret_avg + ret_std == 1:
|
|
90
|
+
# Only return one column
|
|
91
|
+
return results[0]
|
|
92
|
+
|
|
93
|
+
return results
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Computation of the 10th and 90th percentile of grayscale values inside the
|
|
3
|
+
RT-DC event image mask with background-correction taken into account.
|
|
4
|
+
"""
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_bright_perc(mask, image, image_bg, bg_off=None):
|
|
9
|
+
"""Compute 10th and 90th percentile of the bg-corrected event brightness
|
|
10
|
+
|
|
11
|
+
The background-corrected event brightness is defined by the
|
|
12
|
+
gray-scale values of the background-corrected image data
|
|
13
|
+
within the event mask area.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
mask: ndarray or list of ndarrays of shape (M,N) and dtype bool
|
|
18
|
+
The mask values, True where the event is located in `image`.
|
|
19
|
+
image: ndarray or list of ndarrays of shape (M,N)
|
|
20
|
+
A 2D array that holds the image in form of grayscale values
|
|
21
|
+
of an event.
|
|
22
|
+
image_bg: ndarray or list of ndarrays of shape (M,N)
|
|
23
|
+
A 2D array that holds the background image for the same event.
|
|
24
|
+
bg_off: float or 1D ndarray
|
|
25
|
+
Additional offset value that is added to `image_bg` before
|
|
26
|
+
background correction
|
|
27
|
+
|
|
28
|
+
Returns
|
|
29
|
+
-------
|
|
30
|
+
bright_perc_10: float or ndarray of size N
|
|
31
|
+
10th percentile of brightness
|
|
32
|
+
bright_perc_10: float or ndarray of size N
|
|
33
|
+
90th percentile of brightness
|
|
34
|
+
"""
|
|
35
|
+
if isinstance(mask, np.ndarray) and len(mask.shape) == 2:
|
|
36
|
+
# We have a single image
|
|
37
|
+
image_bg = [image_bg]
|
|
38
|
+
image = [image]
|
|
39
|
+
mask = [mask]
|
|
40
|
+
ret_list = False
|
|
41
|
+
else:
|
|
42
|
+
ret_list = True
|
|
43
|
+
|
|
44
|
+
length = min(len(mask), len(image), len(image_bg))
|
|
45
|
+
|
|
46
|
+
p10 = np.zeros(length, dtype=np.float64) * np.nan
|
|
47
|
+
p90 = np.zeros(length, dtype=np.float64) * np.nan
|
|
48
|
+
|
|
49
|
+
for ii in range(length):
|
|
50
|
+
# cast to integer before subtraction
|
|
51
|
+
imgi = np.array(image[ii], dtype=int) - image_bg[ii]
|
|
52
|
+
mski = mask[ii]
|
|
53
|
+
# Assign results
|
|
54
|
+
p10[ii], p90[ii] = np.percentile(imgi[mski], q=[10, 90])
|
|
55
|
+
|
|
56
|
+
if bg_off:
|
|
57
|
+
p10 -= bg_off
|
|
58
|
+
p90 -= bg_off
|
|
59
|
+
|
|
60
|
+
if ret_list:
|
|
61
|
+
return p10, p90
|
|
62
|
+
else:
|
|
63
|
+
return p10[0], p90[0]
|