dclab 0.67.0__cp314-cp314-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dclab might be problematic. Click here for more details.
- dclab/__init__.py +41 -0
- dclab/_version.py +34 -0
- dclab/cached.py +97 -0
- dclab/cli/__init__.py +10 -0
- dclab/cli/common.py +237 -0
- dclab/cli/task_compress.py +126 -0
- dclab/cli/task_condense.py +223 -0
- dclab/cli/task_join.py +229 -0
- dclab/cli/task_repack.py +98 -0
- dclab/cli/task_split.py +154 -0
- dclab/cli/task_tdms2rtdc.py +186 -0
- dclab/cli/task_verify_dataset.py +75 -0
- dclab/definitions/__init__.py +79 -0
- dclab/definitions/feat_const.py +202 -0
- dclab/definitions/feat_logic.py +182 -0
- dclab/definitions/meta_const.py +252 -0
- dclab/definitions/meta_logic.py +111 -0
- dclab/definitions/meta_parse.py +94 -0
- dclab/downsampling.cpython-314-darwin.so +0 -0
- dclab/downsampling.pyx +230 -0
- dclab/external/__init__.py +4 -0
- dclab/external/packaging/LICENSE +3 -0
- dclab/external/packaging/LICENSE.APACHE +177 -0
- dclab/external/packaging/LICENSE.BSD +23 -0
- dclab/external/packaging/__init__.py +6 -0
- dclab/external/packaging/_structures.py +61 -0
- dclab/external/packaging/version.py +505 -0
- dclab/external/skimage/LICENSE +28 -0
- dclab/external/skimage/__init__.py +2 -0
- dclab/external/skimage/_find_contours.py +216 -0
- dclab/external/skimage/_find_contours_cy.cpython-314-darwin.so +0 -0
- dclab/external/skimage/_find_contours_cy.pyx +188 -0
- dclab/external/skimage/_pnpoly.cpython-314-darwin.so +0 -0
- dclab/external/skimage/_pnpoly.pyx +99 -0
- dclab/external/skimage/_shared/__init__.py +1 -0
- dclab/external/skimage/_shared/geometry.cpython-314-darwin.so +0 -0
- dclab/external/skimage/_shared/geometry.pxd +6 -0
- dclab/external/skimage/_shared/geometry.pyx +55 -0
- dclab/external/skimage/measure.py +7 -0
- dclab/external/skimage/pnpoly.py +53 -0
- dclab/external/statsmodels/LICENSE +35 -0
- dclab/external/statsmodels/__init__.py +6 -0
- dclab/external/statsmodels/nonparametric/__init__.py +1 -0
- dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
- dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
- dclab/external/statsmodels/nonparametric/kernels.py +36 -0
- dclab/features/__init__.py +9 -0
- dclab/features/bright.py +81 -0
- dclab/features/bright_bc.py +93 -0
- dclab/features/bright_perc.py +63 -0
- dclab/features/contour.py +161 -0
- dclab/features/emodulus/__init__.py +339 -0
- dclab/features/emodulus/load.py +252 -0
- dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
- dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
- dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
- dclab/features/emodulus/pxcorr.py +135 -0
- dclab/features/emodulus/scale_linear.py +247 -0
- dclab/features/emodulus/viscosity.py +260 -0
- dclab/features/fl_crosstalk.py +95 -0
- dclab/features/inert_ratio.py +377 -0
- dclab/features/volume.py +242 -0
- dclab/http_utils.py +322 -0
- dclab/isoelastics/__init__.py +468 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
- dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
- dclab/kde/__init__.py +1 -0
- dclab/kde/base.py +459 -0
- dclab/kde/contours.py +222 -0
- dclab/kde/methods.py +313 -0
- dclab/kde_contours.py +10 -0
- dclab/kde_methods.py +11 -0
- dclab/lme4/__init__.py +5 -0
- dclab/lme4/lme4_template.R +94 -0
- dclab/lme4/rsetup.py +204 -0
- dclab/lme4/wrapr.py +386 -0
- dclab/polygon_filter.py +398 -0
- dclab/rtdc_dataset/__init__.py +15 -0
- dclab/rtdc_dataset/check.py +902 -0
- dclab/rtdc_dataset/config.py +533 -0
- dclab/rtdc_dataset/copier.py +353 -0
- dclab/rtdc_dataset/core.py +896 -0
- dclab/rtdc_dataset/export.py +867 -0
- dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
- dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
- dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
- dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
- dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
- dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
- dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
- dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
- dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
- dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
- dclab/rtdc_dataset/feat_basin.py +762 -0
- dclab/rtdc_dataset/feat_temp.py +102 -0
- dclab/rtdc_dataset/filter.py +263 -0
- dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
- dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
- dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
- dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
- dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
- dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
- dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
- dclab/rtdc_dataset/fmt_dict.py +103 -0
- dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
- dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
- dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
- dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
- dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
- dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
- dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
- dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
- dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
- dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
- dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
- dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
- dclab/rtdc_dataset/fmt_http.py +102 -0
- dclab/rtdc_dataset/fmt_s3.py +354 -0
- dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
- dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
- dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
- dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
- dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
- dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
- dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
- dclab/rtdc_dataset/load.py +77 -0
- dclab/rtdc_dataset/meta_table.py +25 -0
- dclab/rtdc_dataset/writer.py +1019 -0
- dclab/statistics.py +226 -0
- dclab/util.py +176 -0
- dclab/warn.py +15 -0
- dclab-0.67.0.dist-info/METADATA +153 -0
- dclab-0.67.0.dist-info/RECORD +142 -0
- dclab-0.67.0.dist-info/WHEEL +6 -0
- dclab-0.67.0.dist-info/entry_points.txt +8 -0
- dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
- dclab-0.67.0.dist-info/top_level.txt +1 -0
dclab/lme4/wrapr.py
ADDED
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
"""R lme4 wrapper"""
|
|
2
|
+
import logging
|
|
3
|
+
import numbers
|
|
4
|
+
import pathlib
|
|
5
|
+
import tempfile
|
|
6
|
+
|
|
7
|
+
import importlib_resources
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
from .. import definitions as dfn
|
|
11
|
+
from ..rtdc_dataset.core import RTDCBase
|
|
12
|
+
|
|
13
|
+
from . import rsetup
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Rlme4(object):
|
|
20
|
+
def __init__(self, model="lmer", feature="deform"):
|
|
21
|
+
"""Perform an R-lme4 analysis with RT-DC data
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
model: str
|
|
26
|
+
One of:
|
|
27
|
+
|
|
28
|
+
- "lmer": linear mixed model using lme4's ``lmer``
|
|
29
|
+
- "glmer+loglink": generalized linear mixed model using
|
|
30
|
+
lme4's ``glmer`` with an additional a log-link function
|
|
31
|
+
via the ``family=Gamma(link='log'))`` keyword.
|
|
32
|
+
feature: str
|
|
33
|
+
Dclab feature for which to compute the model
|
|
34
|
+
"""
|
|
35
|
+
#: modeling method to use (e.g. "lmer")
|
|
36
|
+
self.model = None
|
|
37
|
+
#: dclab feature for which to perform the analysis
|
|
38
|
+
self.feature = None
|
|
39
|
+
#: list of [RTDCBase, column, repetition, chip_region]
|
|
40
|
+
self.data = []
|
|
41
|
+
|
|
42
|
+
self.set_options(model=model, feature=feature)
|
|
43
|
+
|
|
44
|
+
# Make sure that lme4 is available
|
|
45
|
+
if not rsetup.has_lme4():
|
|
46
|
+
logger.info("Installing lme4, this may take a while!")
|
|
47
|
+
rsetup.require_lme4()
|
|
48
|
+
|
|
49
|
+
def add_dataset(self, ds, group, repetition):
|
|
50
|
+
"""Add a dataset to the analysis list
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
ds: RTDCBase
|
|
55
|
+
Dataset
|
|
56
|
+
group: str
|
|
57
|
+
The group the measurement belongs to ("control" or
|
|
58
|
+
"treatment")
|
|
59
|
+
repetition: int
|
|
60
|
+
Repetition of the measurement
|
|
61
|
+
|
|
62
|
+
Notes
|
|
63
|
+
-----
|
|
64
|
+
- For each repetition, there must be a "treatment" (``1``) and a
|
|
65
|
+
"control" (``0``) group.
|
|
66
|
+
- If you would like to perform a differential feature analysis,
|
|
67
|
+
then you need to pass at least a reservoir and a channel
|
|
68
|
+
dataset (with same parameters for `group` and `repetition`).
|
|
69
|
+
"""
|
|
70
|
+
assert group in ["treatment", "control"]
|
|
71
|
+
assert isinstance(ds, RTDCBase)
|
|
72
|
+
assert isinstance(repetition, numbers.Integral)
|
|
73
|
+
|
|
74
|
+
region = ds.config["setup"]["chip region"]
|
|
75
|
+
# make sure there are no doublets
|
|
76
|
+
for ii, dd in enumerate(self.data):
|
|
77
|
+
if dd[1] == group and dd[2] == repetition and dd[3] == region:
|
|
78
|
+
raise ValueError("A dataset with group '{}', ".format(group)
|
|
79
|
+
+ "repetition '{}', and ".format(repetition)
|
|
80
|
+
+ "'{}' region has already ".format(region)
|
|
81
|
+
+ "been added (index {})!".format(ii))
|
|
82
|
+
|
|
83
|
+
self.data.append([ds, group, repetition, region])
|
|
84
|
+
|
|
85
|
+
def check_data(self):
|
|
86
|
+
"""Perform sanity checks on ``self.data``"""
|
|
87
|
+
# Check that we have enough data
|
|
88
|
+
if len(self.data) < 3:
|
|
89
|
+
msg = "Linear mixed effects models require repeated " \
|
|
90
|
+
+ "measurements. Please add more repetitions."
|
|
91
|
+
raise ValueError(msg)
|
|
92
|
+
|
|
93
|
+
def fit(self, model=None, feature=None):
|
|
94
|
+
"""Perform (generalized) linear mixed-effects model fit
|
|
95
|
+
|
|
96
|
+
The response variable is modeled using two linear mixed effect
|
|
97
|
+
models:
|
|
98
|
+
|
|
99
|
+
- model: "feature ~ group + (1 + group | repetition)"
|
|
100
|
+
(random intercept + random slope model)
|
|
101
|
+
- the null model: "feature ~ (1 + group | repetition)"
|
|
102
|
+
(without the fixed effect introduced by the "treatment" group).
|
|
103
|
+
|
|
104
|
+
Both models are compared in R using "anova" (from the
|
|
105
|
+
R-package "stats" :cite:`Everitt1992`) which performs a
|
|
106
|
+
likelihood ratio test to obtain the p-Value for the
|
|
107
|
+
significance of the fixed effect (treatment).
|
|
108
|
+
|
|
109
|
+
If the input datasets contain data from the "reservoir"
|
|
110
|
+
region, then the analysis is performed for the differential
|
|
111
|
+
feature.
|
|
112
|
+
|
|
113
|
+
Parameters
|
|
114
|
+
----------
|
|
115
|
+
model: str (optional)
|
|
116
|
+
One of:
|
|
117
|
+
|
|
118
|
+
- "lmer": linear mixed model using lme4's ``lmer``
|
|
119
|
+
- "glmer+loglink": generalized linear mixed model using
|
|
120
|
+
lme4's ``glmer`` with an additional log-link function
|
|
121
|
+
via ``family=Gamma(link='log'))`` :cite:`lme4`
|
|
122
|
+
feature: str (optional)
|
|
123
|
+
dclab feature for which to compute the model
|
|
124
|
+
|
|
125
|
+
Returns
|
|
126
|
+
-------
|
|
127
|
+
results: dict
|
|
128
|
+
Dictionary with the results of the fitting process:
|
|
129
|
+
|
|
130
|
+
- "anova p-value": Anova likelihood ratio test (significance)
|
|
131
|
+
- "feature": name of the feature used for the analysis
|
|
132
|
+
``self.feature``
|
|
133
|
+
- "fixed effects intercept": Mean of ``self.feature`` for all
|
|
134
|
+
controls; In the case of the "glmer+loglink" model, the intercept
|
|
135
|
+
is already back transformed from log space.
|
|
136
|
+
- "fixed effects treatment": The fixed effect size between the mean
|
|
137
|
+
of the controls and the mean of the treatments relative to
|
|
138
|
+
"fixed effects intercept"; In the case of the "glmer+loglink"
|
|
139
|
+
model, the fixed effect is already back transformed from log
|
|
140
|
+
space.
|
|
141
|
+
- "fixed effects repetitions": The effects (intercept and
|
|
142
|
+
treatment) for each repetition. The first axis defines
|
|
143
|
+
intercept/treatment; the second axis enumerates the repetitions;
|
|
144
|
+
thus the shape is (2, number of repetitions) and
|
|
145
|
+
``np.mean(results["fixed effects repetitions"], axis=1)`` is
|
|
146
|
+
equivalent to the tuple (``results["fixed effects intercept"]``,
|
|
147
|
+
``results["fixed effects treatment"]``) for the "lmer" model.
|
|
148
|
+
This does not hold for the "glmer+loglink" model, because
|
|
149
|
+
of the non-linear inverse transform back from log space.
|
|
150
|
+
- "is differential": Boolean indicating whether or not
|
|
151
|
+
the analysis was performed for the differential (bootstrapped
|
|
152
|
+
and subtracted reservoir from channel data) feature
|
|
153
|
+
- "model": model name used for the analysis ``self.model``
|
|
154
|
+
- "model converged": boolean indicating whether the model
|
|
155
|
+
converged
|
|
156
|
+
- "r model summary": Summary of the model
|
|
157
|
+
- "r model coefficients": Model coefficient table
|
|
158
|
+
- "r script": the R script used
|
|
159
|
+
- "r output": full output of the R script
|
|
160
|
+
"""
|
|
161
|
+
self.set_options(model=model, feature=feature)
|
|
162
|
+
self.check_data()
|
|
163
|
+
|
|
164
|
+
# Assemble dataset
|
|
165
|
+
if self.is_differential():
|
|
166
|
+
# bootstrap and compute differential features using reservoir
|
|
167
|
+
features, groups, repetitions = self.get_differential_dataset()
|
|
168
|
+
else:
|
|
169
|
+
# regular feature analysis
|
|
170
|
+
features = []
|
|
171
|
+
groups = []
|
|
172
|
+
repetitions = []
|
|
173
|
+
for dd in self.data:
|
|
174
|
+
features.append(self.get_feature_data(dd[1], dd[2]))
|
|
175
|
+
groups.append(dd[1])
|
|
176
|
+
repetitions.append(dd[2])
|
|
177
|
+
|
|
178
|
+
# concatenate and populate arrays for R
|
|
179
|
+
features_c = np.concatenate(features)
|
|
180
|
+
groups_c = np.zeros(len(features_c), dtype=str)
|
|
181
|
+
repetitions_c = np.zeros(len(features_c), dtype=int)
|
|
182
|
+
pos = 0
|
|
183
|
+
for ii in range(len(features)):
|
|
184
|
+
size = len(features[ii])
|
|
185
|
+
groups_c[pos:pos+size] = groups[ii][0]
|
|
186
|
+
repetitions_c[pos:pos+size] = repetitions[ii]
|
|
187
|
+
pos += size
|
|
188
|
+
|
|
189
|
+
# Run R with the given template script
|
|
190
|
+
rscript = importlib_resources.read_text("dclab.lme4",
|
|
191
|
+
"lme4_template.R")
|
|
192
|
+
_, script_path = tempfile.mkstemp(prefix="dclab_lme4_", suffix=".R",
|
|
193
|
+
text=True)
|
|
194
|
+
script_path = pathlib.Path(script_path)
|
|
195
|
+
rscript = rscript.replace("<MODEL_NAME>", self.model)
|
|
196
|
+
rscript = rscript.replace("<FEATURES>", arr2str(features_c))
|
|
197
|
+
rscript = rscript.replace("<REPETITIONS>", arr2str(repetitions_c))
|
|
198
|
+
rscript = rscript.replace("<GROUPS>", arr2str(groups_c))
|
|
199
|
+
script_path.write_text(rscript, encoding="utf-8")
|
|
200
|
+
|
|
201
|
+
result = rsetup.run_command((rsetup.get_r_script_path(), script_path))
|
|
202
|
+
|
|
203
|
+
ret_dict = self.parse_result(result)
|
|
204
|
+
ret_dict["is differential"] = self.is_differential()
|
|
205
|
+
ret_dict["feature"] = self.feature
|
|
206
|
+
ret_dict["r script"] = rscript
|
|
207
|
+
ret_dict["r output"] = result
|
|
208
|
+
assert ret_dict["model"] == self.model
|
|
209
|
+
|
|
210
|
+
return ret_dict
|
|
211
|
+
|
|
212
|
+
def get_differential_dataset(self):
|
|
213
|
+
"""Return the differential dataset for channel/reservoir data
|
|
214
|
+
|
|
215
|
+
The most famous use case is differential deformation. The idea
|
|
216
|
+
is that you cannot tell what the difference in deformation
|
|
217
|
+
from channel to reservoir, because you never measure the
|
|
218
|
+
same object in the reservoir and the channel. You usually just
|
|
219
|
+
have two distributions. Comparing distributions is possible
|
|
220
|
+
via bootstrapping. And then, instead of running the lme4
|
|
221
|
+
analysis with the channel deformation data, it is run with
|
|
222
|
+
the differential deformation (subtraction of the bootstrapped
|
|
223
|
+
deformation distributions for channel and reservoir).
|
|
224
|
+
"""
|
|
225
|
+
features = []
|
|
226
|
+
groups = []
|
|
227
|
+
repetitions = []
|
|
228
|
+
# compute differential features
|
|
229
|
+
for grp in sorted(set([dd[1] for dd in self.data])):
|
|
230
|
+
# repetitions per groups
|
|
231
|
+
grp_rep = sorted(set([dd[2] for dd in self.data if dd[1] == grp]))
|
|
232
|
+
for rep in grp_rep:
|
|
233
|
+
feat_cha = self.get_feature_data(grp, rep, region="channel")
|
|
234
|
+
feat_res = self.get_feature_data(grp, rep, region="reservoir")
|
|
235
|
+
bs_cha, bs_res = bootstrapped_median_distributions(feat_cha,
|
|
236
|
+
feat_res)
|
|
237
|
+
# differential feature
|
|
238
|
+
features.append(bs_cha - bs_res)
|
|
239
|
+
groups.append(grp)
|
|
240
|
+
repetitions.append(rep)
|
|
241
|
+
return features, groups, repetitions
|
|
242
|
+
|
|
243
|
+
def get_feature_data(self, group, repetition, region="channel"):
|
|
244
|
+
"""Return array containing feature data
|
|
245
|
+
|
|
246
|
+
Parameters
|
|
247
|
+
----------
|
|
248
|
+
group: str
|
|
249
|
+
Measurement group ("control" or "treatment")
|
|
250
|
+
repetition: int
|
|
251
|
+
Measurement repetition
|
|
252
|
+
region: str
|
|
253
|
+
Either "channel" or "reservoir"
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
fdata: 1d ndarray
|
|
258
|
+
Feature data (Nans and Infs removed)
|
|
259
|
+
"""
|
|
260
|
+
assert group in ["control", "treatment"]
|
|
261
|
+
assert isinstance(repetition, numbers.Integral)
|
|
262
|
+
assert region in ["reservoir", "channel"]
|
|
263
|
+
for dd in self.data:
|
|
264
|
+
if dd[1] == group and dd[2] == repetition and dd[3] == region:
|
|
265
|
+
ds = dd[0]
|
|
266
|
+
break
|
|
267
|
+
else:
|
|
268
|
+
raise ValueError("Dataset for group '{}', repetition".format(group)
|
|
269
|
+
+ " '{}', and region".format(repetition)
|
|
270
|
+
+ " '{}' not found!".format(region))
|
|
271
|
+
fdata = ds[self.feature][ds.filter.all]
|
|
272
|
+
fdata_valid = fdata[~np.logical_or(np.isnan(fdata), np.isinf(fdata))]
|
|
273
|
+
return fdata_valid
|
|
274
|
+
|
|
275
|
+
def is_differential(self):
|
|
276
|
+
"""Return True if the differential feature is computed for analysis
|
|
277
|
+
|
|
278
|
+
This effectively just checks the regions of the datasets
|
|
279
|
+
and returns True if any one of the regions is "reservoir".
|
|
280
|
+
|
|
281
|
+
See Also
|
|
282
|
+
--------
|
|
283
|
+
get_differential_features: for an explanation
|
|
284
|
+
"""
|
|
285
|
+
for dd in self.data:
|
|
286
|
+
if dd[3] == "reservoir":
|
|
287
|
+
return True
|
|
288
|
+
else:
|
|
289
|
+
return False
|
|
290
|
+
|
|
291
|
+
def parse_result(self, result):
|
|
292
|
+
resd = result.split("OUTPUT")
|
|
293
|
+
ret_dict = {}
|
|
294
|
+
for item in resd:
|
|
295
|
+
string = item.split("#*#")[0]
|
|
296
|
+
key, value = string.split(":", 1)
|
|
297
|
+
key = key.strip()
|
|
298
|
+
value = value.strip().replace("\n\n", "\n")
|
|
299
|
+
|
|
300
|
+
if key == "fixed effects repetitions":
|
|
301
|
+
rows = value.split("\n")[1:]
|
|
302
|
+
reps = []
|
|
303
|
+
for row in rows:
|
|
304
|
+
reps.append([float(vv) for vv in row.split()[1:]])
|
|
305
|
+
value = np.array(reps).transpose()
|
|
306
|
+
elif key == "model converged":
|
|
307
|
+
value = value == "TRUE"
|
|
308
|
+
elif value == "NA":
|
|
309
|
+
value = np.nan
|
|
310
|
+
else:
|
|
311
|
+
try:
|
|
312
|
+
value = float(value)
|
|
313
|
+
except ValueError:
|
|
314
|
+
pass
|
|
315
|
+
|
|
316
|
+
ret_dict[key] = value
|
|
317
|
+
return ret_dict
|
|
318
|
+
|
|
319
|
+
def set_options(self, model=None, feature=None):
|
|
320
|
+
"""Set analysis options"""
|
|
321
|
+
if model is not None:
|
|
322
|
+
assert model in ["lmer", "glmer+loglink"]
|
|
323
|
+
self.model = model
|
|
324
|
+
if feature is not None:
|
|
325
|
+
assert dfn.scalar_feature_exists(feature)
|
|
326
|
+
self.feature = feature
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def arr2str(a):
|
|
330
|
+
"""Convert an array to a string"""
|
|
331
|
+
if isinstance(a.dtype.type, np.integer):
|
|
332
|
+
return ",".join(str(dd) for dd in a.tolist())
|
|
333
|
+
elif a.dtype.type == np.str_:
|
|
334
|
+
return ",".join(f"'{dd}'" for dd in a.tolist())
|
|
335
|
+
else:
|
|
336
|
+
return ",".join(f"{dd:.16g}" for dd in a.tolist())
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def bootstrapped_median_distributions(a, b, bs_iter=1000, rs=117):
|
|
340
|
+
"""Compute the bootstrapped distributions for two arrays.
|
|
341
|
+
|
|
342
|
+
Parameters
|
|
343
|
+
----------
|
|
344
|
+
a, b: 1d ndarray of length N
|
|
345
|
+
Input data
|
|
346
|
+
bs_iter: int
|
|
347
|
+
Number of bootstrapping iterations to perform
|
|
348
|
+
(output size).
|
|
349
|
+
rs: int
|
|
350
|
+
Random state seed for random number generator
|
|
351
|
+
|
|
352
|
+
Returns
|
|
353
|
+
-------
|
|
354
|
+
median_dist_a, median_dist_b: 1d arrays of length bs_iter
|
|
355
|
+
Boostrap distribution of medians for ``a`` and ``b``.
|
|
356
|
+
|
|
357
|
+
See Also
|
|
358
|
+
--------
|
|
359
|
+
`<https://en.wikipedia.org/wiki/Bootstrapping_(statistics)>`_
|
|
360
|
+
|
|
361
|
+
Notes
|
|
362
|
+
-----
|
|
363
|
+
From a programmatic point of view, it would have been better
|
|
364
|
+
to implement this method for just one input array (because of
|
|
365
|
+
redundant code). However, due to historical reasons (testing
|
|
366
|
+
and comparability to Shape-Out 1), bootstrapping is done
|
|
367
|
+
interleaved for the two arrays.
|
|
368
|
+
"""
|
|
369
|
+
# Seed random numbers that are reproducible on different machines
|
|
370
|
+
prng_object = np.random.RandomState(rs)
|
|
371
|
+
# Initialize median arrays
|
|
372
|
+
median_a = np.zeros(bs_iter)
|
|
373
|
+
median_b = np.zeros(bs_iter)
|
|
374
|
+
# If this loop is still too slow, we could get rid of it and
|
|
375
|
+
# do everything with arrays. Depends on whether we will
|
|
376
|
+
# eventually run into memory problems with array sizes
|
|
377
|
+
# of y*bs_iter and yR*bs_iter.
|
|
378
|
+
lena = len(a)
|
|
379
|
+
lenb = len(b)
|
|
380
|
+
for q in range(bs_iter):
|
|
381
|
+
# Compute random indices and draw from a, b
|
|
382
|
+
draw_a_idx = prng_object.randint(0, lena, lena)
|
|
383
|
+
median_a[q] = np.median(a[draw_a_idx])
|
|
384
|
+
draw_b_idx = prng_object.randint(0, lenb, lenb)
|
|
385
|
+
median_b[q] = np.median(b[draw_b_idx])
|
|
386
|
+
return median_a, median_b
|