dclab 0.62.11__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

Files changed (137) hide show
  1. dclab/__init__.py +23 -0
  2. dclab/_version.py +16 -0
  3. dclab/cached.py +97 -0
  4. dclab/cli/__init__.py +10 -0
  5. dclab/cli/common.py +237 -0
  6. dclab/cli/task_compress.py +126 -0
  7. dclab/cli/task_condense.py +223 -0
  8. dclab/cli/task_join.py +229 -0
  9. dclab/cli/task_repack.py +98 -0
  10. dclab/cli/task_split.py +154 -0
  11. dclab/cli/task_tdms2rtdc.py +186 -0
  12. dclab/cli/task_verify_dataset.py +75 -0
  13. dclab/definitions/__init__.py +79 -0
  14. dclab/definitions/feat_const.py +202 -0
  15. dclab/definitions/feat_logic.py +183 -0
  16. dclab/definitions/meta_const.py +252 -0
  17. dclab/definitions/meta_logic.py +111 -0
  18. dclab/definitions/meta_parse.py +94 -0
  19. dclab/downsampling.cp313-win_amd64.pyd +0 -0
  20. dclab/downsampling.pyx +230 -0
  21. dclab/external/__init__.py +4 -0
  22. dclab/external/packaging/LICENSE +3 -0
  23. dclab/external/packaging/LICENSE.APACHE +177 -0
  24. dclab/external/packaging/LICENSE.BSD +23 -0
  25. dclab/external/packaging/__init__.py +6 -0
  26. dclab/external/packaging/_structures.py +61 -0
  27. dclab/external/packaging/version.py +505 -0
  28. dclab/external/skimage/LICENSE +28 -0
  29. dclab/external/skimage/__init__.py +2 -0
  30. dclab/external/skimage/_find_contours.py +216 -0
  31. dclab/external/skimage/_find_contours_cy.cp313-win_amd64.pyd +0 -0
  32. dclab/external/skimage/_find_contours_cy.pyx +188 -0
  33. dclab/external/skimage/_pnpoly.cp313-win_amd64.pyd +0 -0
  34. dclab/external/skimage/_pnpoly.pyx +99 -0
  35. dclab/external/skimage/_shared/__init__.py +1 -0
  36. dclab/external/skimage/_shared/geometry.cp313-win_amd64.pyd +0 -0
  37. dclab/external/skimage/_shared/geometry.pxd +6 -0
  38. dclab/external/skimage/_shared/geometry.pyx +55 -0
  39. dclab/external/skimage/measure.py +7 -0
  40. dclab/external/skimage/pnpoly.py +53 -0
  41. dclab/external/statsmodels/LICENSE +35 -0
  42. dclab/external/statsmodels/__init__.py +6 -0
  43. dclab/external/statsmodels/nonparametric/__init__.py +1 -0
  44. dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
  45. dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
  46. dclab/external/statsmodels/nonparametric/kernels.py +36 -0
  47. dclab/features/__init__.py +9 -0
  48. dclab/features/bright.py +81 -0
  49. dclab/features/bright_bc.py +93 -0
  50. dclab/features/bright_perc.py +63 -0
  51. dclab/features/contour.py +161 -0
  52. dclab/features/emodulus/__init__.py +339 -0
  53. dclab/features/emodulus/load.py +252 -0
  54. dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
  55. dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
  56. dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
  57. dclab/features/emodulus/pxcorr.py +135 -0
  58. dclab/features/emodulus/scale_linear.py +247 -0
  59. dclab/features/emodulus/viscosity.py +256 -0
  60. dclab/features/fl_crosstalk.py +95 -0
  61. dclab/features/inert_ratio.py +377 -0
  62. dclab/features/volume.py +242 -0
  63. dclab/http_utils.py +322 -0
  64. dclab/isoelastics/__init__.py +468 -0
  65. dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
  66. dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
  67. dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
  68. dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
  69. dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
  70. dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
  71. dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
  72. dclab/kde_contours.py +222 -0
  73. dclab/kde_methods.py +303 -0
  74. dclab/lme4/__init__.py +5 -0
  75. dclab/lme4/lme4_template.R +94 -0
  76. dclab/lme4/rsetup.py +204 -0
  77. dclab/lme4/wrapr.py +386 -0
  78. dclab/polygon_filter.py +398 -0
  79. dclab/rtdc_dataset/__init__.py +15 -0
  80. dclab/rtdc_dataset/check.py +902 -0
  81. dclab/rtdc_dataset/config.py +533 -0
  82. dclab/rtdc_dataset/copier.py +353 -0
  83. dclab/rtdc_dataset/core.py +1001 -0
  84. dclab/rtdc_dataset/export.py +737 -0
  85. dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
  86. dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
  87. dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
  88. dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
  89. dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
  90. dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
  91. dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
  92. dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
  93. dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
  94. dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
  95. dclab/rtdc_dataset/feat_basin.py +550 -0
  96. dclab/rtdc_dataset/feat_temp.py +102 -0
  97. dclab/rtdc_dataset/filter.py +263 -0
  98. dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
  99. dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
  100. dclab/rtdc_dataset/fmt_dcor/api.py +111 -0
  101. dclab/rtdc_dataset/fmt_dcor/base.py +200 -0
  102. dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
  103. dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
  104. dclab/rtdc_dataset/fmt_dcor/tables.py +42 -0
  105. dclab/rtdc_dataset/fmt_dict.py +103 -0
  106. dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
  107. dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
  108. dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
  109. dclab/rtdc_dataset/fmt_hdf5/events.py +257 -0
  110. dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
  111. dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
  112. dclab/rtdc_dataset/fmt_hdf5/tables.py +30 -0
  113. dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
  114. dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
  115. dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
  116. dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
  117. dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
  118. dclab/rtdc_dataset/fmt_http.py +102 -0
  119. dclab/rtdc_dataset/fmt_s3.py +320 -0
  120. dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
  121. dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
  122. dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
  123. dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
  124. dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
  125. dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
  126. dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
  127. dclab/rtdc_dataset/load.py +72 -0
  128. dclab/rtdc_dataset/writer.py +985 -0
  129. dclab/statistics.py +203 -0
  130. dclab/util.py +156 -0
  131. dclab/warn.py +15 -0
  132. dclab-0.62.11.dist-info/LICENSE +343 -0
  133. dclab-0.62.11.dist-info/METADATA +146 -0
  134. dclab-0.62.11.dist-info/RECORD +137 -0
  135. dclab-0.62.11.dist-info/WHEEL +5 -0
  136. dclab-0.62.11.dist-info/entry_points.txt +8 -0
  137. dclab-0.62.11.dist-info/top_level.txt +1 -0
dclab/lme4/rsetup.py ADDED
@@ -0,0 +1,204 @@
1
+ import logging
2
+ import os
3
+ import pathlib
4
+ import shutil
5
+ import subprocess as sp
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ _has_lme4 = None
10
+ _has_r = None
11
+
12
+
13
+ class CommandFailedError(BaseException):
14
+ """Used when `run_command` encounters an error"""
15
+ pass
16
+
17
+
18
+ class RNotFoundError(BaseException):
19
+ pass
20
+
21
+
22
+ def get_r_path():
23
+ """Return the path of the R executable"""
24
+ # Maybe the user set the executable already?
25
+ r_exec = os.environ.get("R_EXEC")
26
+ if r_exec is not None:
27
+ r_exec = pathlib.Path(r_exec)
28
+ if r_exec.is_file():
29
+ return r_exec
30
+
31
+ # Try to get the executable using which
32
+ r_exec = shutil.which("R")
33
+ if r_exec is not None:
34
+ r_exec = pathlib.Path(r_exec)
35
+ return r_exec
36
+
37
+ # Try to determine the path to the executable from R_HOME
38
+ r_home = os.environ.get("R_HOME")
39
+ if r_home and not pathlib.Path(r_home).is_dir():
40
+ logger.warning(f"R_HOME Directory does not exist: {r_home}")
41
+ r_home = None
42
+
43
+ if r_home is None:
44
+ raise RNotFoundError(
45
+ "Cannot find R, please set the `R_HOME` environment variable "
46
+ "or use `set_r_path`.")
47
+
48
+ r_home = pathlib.Path(r_home)
49
+
50
+ # search for the R executable
51
+ for rr in [
52
+ r_home / "bin" / "R",
53
+ r_home / "bin" / "x64" / "R",
54
+ ]:
55
+ if rr.is_file():
56
+ return rr
57
+ rr_win = rr.with_name("R.exe")
58
+ if rr_win.is_file():
59
+ return rr_win
60
+ else:
61
+ raise RNotFoundError(
62
+ f"Could not find R binary in '{r_home}'")
63
+
64
+
65
+ def get_r_script_path():
66
+ """Return the path to the Rscript executable"""
67
+ return get_r_path().with_name("Rscript")
68
+
69
+
70
+ def get_r_version():
71
+ """Return the full R version string"""
72
+ require_r()
73
+ cmd = (str(get_r_path()), "--version")
74
+ logger.debug(f"Looking for R version with: {' '.join(cmd)}")
75
+ r_version = run_command(
76
+ cmd,
77
+ env={"R_LIBS_USER": os.environ.get("R_LIBS_USER", "")},
78
+ )
79
+ r_version = r_version.split(os.linesep)
80
+ if r_version[0].startswith("WARNING"):
81
+ r_version = r_version[1]
82
+ else:
83
+ r_version = r_version[0]
84
+ logger.info(f"R version found: {r_version}")
85
+ # get the actual version string
86
+ if r_version.startswith("R version "):
87
+ r_version = r_version.split(" ", 2)[2]
88
+ return r_version.strip()
89
+
90
+
91
+ def has_lme4():
92
+ """Return True if the lme4 package is installed"""
93
+ global _has_lme4
94
+ if _has_lme4:
95
+ return True
96
+ require_r()
97
+ for pkg in ["lme4", "statmod", "nloptr"]:
98
+ res = run_command(
99
+ (str(get_r_path()), "-q", "-e", f"system.file(package='{pkg}')"),
100
+ env={"R_LIBS_USER": os.environ.get("R_LIBS_USER", "")},
101
+ )
102
+ if not res.split("[1]")[1].count(pkg):
103
+ avail = False
104
+ break
105
+ else:
106
+ avail = _has_lme4 = True
107
+ return avail
108
+
109
+
110
+ def has_r():
111
+ """Return True if R is available"""
112
+ global _has_r
113
+ if _has_r:
114
+ return True
115
+ try:
116
+ hasr = get_r_path().is_file()
117
+ except RNotFoundError:
118
+ hasr = False
119
+ if hasr:
120
+ _has_r = True
121
+ return hasr
122
+
123
+
124
+ def require_lme4():
125
+ """Install the lme4 package (if not already installed)
126
+
127
+ Besides ``lme4``, this also installs ``nloptr`` and ``statmod``.
128
+ The packages are installed to the user data directory
129
+ given in :const:`lib_path` from the http://cran.rstudio.org mirror.
130
+ """
131
+ install_command = ("install.packages("
132
+ "c('statmod','nloptr','lme4'),"
133
+ "repos='http://cran.rstudio.org'"
134
+ ")"
135
+ )
136
+ require_r()
137
+ if not has_lme4():
138
+ run_command(cmd=(get_r_path(), "-e", install_command),
139
+ env={"R_LIBS_USER": os.environ.get("R_LIBS_USER", "")},
140
+ )
141
+
142
+
143
+ def require_r():
144
+ """Make sure R is installed an R HOME is set"""
145
+ if not has_r():
146
+ raise RNotFoundError("Cannot find R, please set its path with the "
147
+ "`set_r_path` function or set the `RHOME` "
148
+ "environment variable.")
149
+
150
+
151
+ def run_command(cmd, **kwargs):
152
+ """Run a command via subprocess"""
153
+ if hasattr(sp, "STARTUPINFO"):
154
+ # On Windows, subprocess calls will pop up a command window by
155
+ # default when run from Pyinstaller with the ``--noconsole``
156
+ # option. Avoid this distraction.
157
+ si = sp.STARTUPINFO()
158
+ si.dwFlags |= sp.STARTF_USESHOWWINDOW
159
+ # Windows doesn't search the path by default. Pass it an
160
+ # environment so it will.
161
+ env = os.environ
162
+ else:
163
+ si = None
164
+ env = None
165
+
166
+ kwargs.setdefault("text", True)
167
+ kwargs.setdefault("stderr", sp.STDOUT)
168
+ if env is not None:
169
+ if "env" in kwargs:
170
+ env.update(kwargs.pop("env"))
171
+ kwargs["env"] = env
172
+ kwargs["startupinfo"] = si
173
+
174
+ # Convert paths to strings
175
+ cmd = [str(cc) for cc in cmd]
176
+
177
+ try:
178
+ tmp = sp.check_output(cmd, **kwargs)
179
+ except sp.CalledProcessError as e:
180
+ raise CommandFailedError(f"The command '{' '.join(cmd)}' failed with "
181
+ f"exit code {e.returncode}: {e.output}")
182
+
183
+ return tmp.strip()
184
+
185
+
186
+ def set_r_lib_path(r_lib_path):
187
+ """Add given directory to the R_LIBS_USER environment variable"""
188
+ paths = os.environ.get("R_LIBS_USER", "").split(os.pathsep)
189
+ paths = [p for p in paths if p]
190
+ paths.append(str(r_lib_path).strip())
191
+ os.environ["R_LIBS_USER"] = os.pathsep.join(list(set(paths)))
192
+
193
+
194
+ def set_r_path(r_path):
195
+ """Set the path of the R executable/binary"""
196
+ tmp = run_command((str(r_path), "RHOME"))
197
+
198
+ r_home = tmp.split(os.linesep)
199
+ if r_home[0].startswith("WARNING"):
200
+ res = r_home[1]
201
+ else:
202
+ res = r_home[0].strip()
203
+ os.environ["R_HOME"] = res
204
+ os.environ["R_EXEC"] = str(pathlib.Path(r_path).resolve())
dclab/lme4/wrapr.py ADDED
@@ -0,0 +1,386 @@
1
+ """R lme4 wrapper"""
2
+ import logging
3
+ import numbers
4
+ import pathlib
5
+ import tempfile
6
+
7
+ import importlib_resources
8
+ import numpy as np
9
+
10
+ from .. import definitions as dfn
11
+ from ..rtdc_dataset.core import RTDCBase
12
+
13
+ from . import rsetup
14
+
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class Rlme4(object):
20
+ def __init__(self, model="lmer", feature="deform"):
21
+ """Perform an R-lme4 analysis with RT-DC data
22
+
23
+ Parameters
24
+ ----------
25
+ model: str
26
+ One of:
27
+
28
+ - "lmer": linear mixed model using lme4's ``lmer``
29
+ - "glmer+loglink": generalized linear mixed model using
30
+ lme4's ``glmer`` with an additional a log-link function
31
+ via the ``family=Gamma(link='log'))`` keyword.
32
+ feature: str
33
+ Dclab feature for which to compute the model
34
+ """
35
+ #: modeling method to use (e.g. "lmer")
36
+ self.model = None
37
+ #: dclab feature for which to perform the analysis
38
+ self.feature = None
39
+ #: list of [RTDCBase, column, repetition, chip_region]
40
+ self.data = []
41
+
42
+ self.set_options(model=model, feature=feature)
43
+
44
+ # Make sure that lme4 is available
45
+ if not rsetup.has_lme4():
46
+ logger.info("Installing lme4, this may take a while!")
47
+ rsetup.require_lme4()
48
+
49
+ def add_dataset(self, ds, group, repetition):
50
+ """Add a dataset to the analysis list
51
+
52
+ Parameters
53
+ ----------
54
+ ds: RTDCBase
55
+ Dataset
56
+ group: str
57
+ The group the measurement belongs to ("control" or
58
+ "treatment")
59
+ repetition: int
60
+ Repetition of the measurement
61
+
62
+ Notes
63
+ -----
64
+ - For each repetition, there must be a "treatment" (``1``) and a
65
+ "control" (``0``) group.
66
+ - If you would like to perform a differential feature analysis,
67
+ then you need to pass at least a reservoir and a channel
68
+ dataset (with same parameters for `group` and `repetition`).
69
+ """
70
+ assert group in ["treatment", "control"]
71
+ assert isinstance(ds, RTDCBase)
72
+ assert isinstance(repetition, numbers.Integral)
73
+
74
+ region = ds.config["setup"]["chip region"]
75
+ # make sure there are no doublets
76
+ for ii, dd in enumerate(self.data):
77
+ if dd[1] == group and dd[2] == repetition and dd[3] == region:
78
+ raise ValueError("A dataset with group '{}', ".format(group)
79
+ + "repetition '{}', and ".format(repetition)
80
+ + "'{}' region has already ".format(region)
81
+ + "been added (index {})!".format(ii))
82
+
83
+ self.data.append([ds, group, repetition, region])
84
+
85
+ def check_data(self):
86
+ """Perform sanity checks on ``self.data``"""
87
+ # Check that we have enough data
88
+ if len(self.data) < 3:
89
+ msg = "Linear mixed effects models require repeated " \
90
+ + "measurements. Please add more repetitions."
91
+ raise ValueError(msg)
92
+
93
+ def fit(self, model=None, feature=None):
94
+ """Perform (generalized) linear mixed-effects model fit
95
+
96
+ The response variable is modeled using two linear mixed effect
97
+ models:
98
+
99
+ - model: "feature ~ group + (1 + group | repetition)"
100
+ (random intercept + random slope model)
101
+ - the null model: "feature ~ (1 + group | repetition)"
102
+ (without the fixed effect introduced by the "treatment" group).
103
+
104
+ Both models are compared in R using "anova" (from the
105
+ R-package "stats" :cite:`Everitt1992`) which performs a
106
+ likelihood ratio test to obtain the p-Value for the
107
+ significance of the fixed effect (treatment).
108
+
109
+ If the input datasets contain data from the "reservoir"
110
+ region, then the analysis is performed for the differential
111
+ feature.
112
+
113
+ Parameters
114
+ ----------
115
+ model: str (optional)
116
+ One of:
117
+
118
+ - "lmer": linear mixed model using lme4's ``lmer``
119
+ - "glmer+loglink": generalized linear mixed model using
120
+ lme4's ``glmer`` with an additional log-link function
121
+ via ``family=Gamma(link='log'))`` :cite:`lme4`
122
+ feature: str (optional)
123
+ dclab feature for which to compute the model
124
+
125
+ Returns
126
+ -------
127
+ results: dict
128
+ Dictionary with the results of the fitting process:
129
+
130
+ - "anova p-value": Anova likelihood ratio test (significance)
131
+ - "feature": name of the feature used for the analysis
132
+ ``self.feature``
133
+ - "fixed effects intercept": Mean of ``self.feature`` for all
134
+ controls; In the case of the "glmer+loglink" model, the intercept
135
+ is already back transformed from log space.
136
+ - "fixed effects treatment": The fixed effect size between the mean
137
+ of the controls and the mean of the treatments relative to
138
+ "fixed effects intercept"; In the case of the "glmer+loglink"
139
+ model, the fixed effect is already back transformed from log
140
+ space.
141
+ - "fixed effects repetitions": The effects (intercept and
142
+ treatment) for each repetition. The first axis defines
143
+ intercept/treatment; the second axis enumerates the repetitions;
144
+ thus the shape is (2, number of repetitions) and
145
+ ``np.mean(results["fixed effects repetitions"], axis=1)`` is
146
+ equivalent to the tuple (``results["fixed effects intercept"]``,
147
+ ``results["fixed effects treatment"]``) for the "lmer" model.
148
+ This does not hold for the "glmer+loglink" model, because
149
+ of the non-linear inverse transform back from log space.
150
+ - "is differential": Boolean indicating whether or not
151
+ the analysis was performed for the differential (bootstrapped
152
+ and subtracted reservoir from channel data) feature
153
+ - "model": model name used for the analysis ``self.model``
154
+ - "model converged": boolean indicating whether the model
155
+ converged
156
+ - "r model summary": Summary of the model
157
+ - "r model coefficients": Model coefficient table
158
+ - "r script": the R script used
159
+ - "r output": full output of the R script
160
+ """
161
+ self.set_options(model=model, feature=feature)
162
+ self.check_data()
163
+
164
+ # Assemble dataset
165
+ if self.is_differential():
166
+ # bootstrap and compute differential features using reservoir
167
+ features, groups, repetitions = self.get_differential_dataset()
168
+ else:
169
+ # regular feature analysis
170
+ features = []
171
+ groups = []
172
+ repetitions = []
173
+ for dd in self.data:
174
+ features.append(self.get_feature_data(dd[1], dd[2]))
175
+ groups.append(dd[1])
176
+ repetitions.append(dd[2])
177
+
178
+ # concatenate and populate arrays for R
179
+ features_c = np.concatenate(features)
180
+ groups_c = np.zeros(len(features_c), dtype=str)
181
+ repetitions_c = np.zeros(len(features_c), dtype=int)
182
+ pos = 0
183
+ for ii in range(len(features)):
184
+ size = len(features[ii])
185
+ groups_c[pos:pos+size] = groups[ii][0]
186
+ repetitions_c[pos:pos+size] = repetitions[ii]
187
+ pos += size
188
+
189
+ # Run R with the given template script
190
+ rscript = importlib_resources.read_text("dclab.lme4",
191
+ "lme4_template.R")
192
+ _, script_path = tempfile.mkstemp(prefix="dclab_lme4_", suffix=".R",
193
+ text=True)
194
+ script_path = pathlib.Path(script_path)
195
+ rscript = rscript.replace("<MODEL_NAME>", self.model)
196
+ rscript = rscript.replace("<FEATURES>", arr2str(features_c))
197
+ rscript = rscript.replace("<REPETITIONS>", arr2str(repetitions_c))
198
+ rscript = rscript.replace("<GROUPS>", arr2str(groups_c))
199
+ script_path.write_text(rscript, encoding="utf-8")
200
+
201
+ result = rsetup.run_command((rsetup.get_r_script_path(), script_path))
202
+
203
+ ret_dict = self.parse_result(result)
204
+ ret_dict["is differential"] = self.is_differential()
205
+ ret_dict["feature"] = self.feature
206
+ ret_dict["r script"] = rscript
207
+ ret_dict["r output"] = result
208
+ assert ret_dict["model"] == self.model
209
+
210
+ return ret_dict
211
+
212
+ def get_differential_dataset(self):
213
+ """Return the differential dataset for channel/reservoir data
214
+
215
+ The most famous use case is differential deformation. The idea
216
+ is that you cannot tell what the difference in deformation
217
+ from channel to reservoir, because you never measure the
218
+ same object in the reservoir and the channel. You usually just
219
+ have two distributions. Comparing distributions is possible
220
+ via bootstrapping. And then, instead of running the lme4
221
+ analysis with the channel deformation data, it is run with
222
+ the differential deformation (subtraction of the bootstrapped
223
+ deformation distributions for channel and reservoir).
224
+ """
225
+ features = []
226
+ groups = []
227
+ repetitions = []
228
+ # compute differential features
229
+ for grp in sorted(set([dd[1] for dd in self.data])):
230
+ # repetitions per groups
231
+ grp_rep = sorted(set([dd[2] for dd in self.data if dd[1] == grp]))
232
+ for rep in grp_rep:
233
+ feat_cha = self.get_feature_data(grp, rep, region="channel")
234
+ feat_res = self.get_feature_data(grp, rep, region="reservoir")
235
+ bs_cha, bs_res = bootstrapped_median_distributions(feat_cha,
236
+ feat_res)
237
+ # differential feature
238
+ features.append(bs_cha - bs_res)
239
+ groups.append(grp)
240
+ repetitions.append(rep)
241
+ return features, groups, repetitions
242
+
243
+ def get_feature_data(self, group, repetition, region="channel"):
244
+ """Return array containing feature data
245
+
246
+ Parameters
247
+ ----------
248
+ group: str
249
+ Measurement group ("control" or "treatment")
250
+ repetition: int
251
+ Measurement repetition
252
+ region: str
253
+ Either "channel" or "reservoir"
254
+
255
+ Returns
256
+ -------
257
+ fdata: 1d ndarray
258
+ Feature data (Nans and Infs removed)
259
+ """
260
+ assert group in ["control", "treatment"]
261
+ assert isinstance(repetition, numbers.Integral)
262
+ assert region in ["reservoir", "channel"]
263
+ for dd in self.data:
264
+ if dd[1] == group and dd[2] == repetition and dd[3] == region:
265
+ ds = dd[0]
266
+ break
267
+ else:
268
+ raise ValueError("Dataset for group '{}', repetition".format(group)
269
+ + " '{}', and region".format(repetition)
270
+ + " '{}' not found!".format(region))
271
+ fdata = ds[self.feature][ds.filter.all]
272
+ fdata_valid = fdata[~np.logical_or(np.isnan(fdata), np.isinf(fdata))]
273
+ return fdata_valid
274
+
275
+ def is_differential(self):
276
+ """Return True if the differential feature is computed for analysis
277
+
278
+ This effectively just checks the regions of the datasets
279
+ and returns True if any one of the regions is "reservoir".
280
+
281
+ See Also
282
+ --------
283
+ get_differential_features: for an explanation
284
+ """
285
+ for dd in self.data:
286
+ if dd[3] == "reservoir":
287
+ return True
288
+ else:
289
+ return False
290
+
291
+ def parse_result(self, result):
292
+ resd = result.split("OUTPUT")
293
+ ret_dict = {}
294
+ for item in resd:
295
+ string = item.split("#*#")[0]
296
+ key, value = string.split(":", 1)
297
+ key = key.strip()
298
+ value = value.strip().replace("\n\n", "\n")
299
+
300
+ if key == "fixed effects repetitions":
301
+ rows = value.split("\n")[1:]
302
+ reps = []
303
+ for row in rows:
304
+ reps.append([float(vv) for vv in row.split()[1:]])
305
+ value = np.array(reps).transpose()
306
+ elif key == "model converged":
307
+ value = value == "TRUE"
308
+ elif value == "NA":
309
+ value = np.nan
310
+ else:
311
+ try:
312
+ value = float(value)
313
+ except ValueError:
314
+ pass
315
+
316
+ ret_dict[key] = value
317
+ return ret_dict
318
+
319
+ def set_options(self, model=None, feature=None):
320
+ """Set analysis options"""
321
+ if model is not None:
322
+ assert model in ["lmer", "glmer+loglink"]
323
+ self.model = model
324
+ if feature is not None:
325
+ assert dfn.scalar_feature_exists(feature)
326
+ self.feature = feature
327
+
328
+
329
+ def arr2str(a):
330
+ """Convert an array to a string"""
331
+ if isinstance(a.dtype.type, np.integer):
332
+ return ",".join(str(dd) for dd in a.tolist())
333
+ elif a.dtype.type == np.str_:
334
+ return ",".join(f"'{dd}'" for dd in a.tolist())
335
+ else:
336
+ return ",".join(f"{dd:.16g}" for dd in a.tolist())
337
+
338
+
339
+ def bootstrapped_median_distributions(a, b, bs_iter=1000, rs=117):
340
+ """Compute the bootstrapped distributions for two arrays.
341
+
342
+ Parameters
343
+ ----------
344
+ a, b: 1d ndarray of length N
345
+ Input data
346
+ bs_iter: int
347
+ Number of bootstrapping iterations to perform
348
+ (output size).
349
+ rs: int
350
+ Random state seed for random number generator
351
+
352
+ Returns
353
+ -------
354
+ median_dist_a, median_dist_b: 1d arrays of length bs_iter
355
+ Boostrap distribution of medians for ``a`` and ``b``.
356
+
357
+ See Also
358
+ --------
359
+ `<https://en.wikipedia.org/wiki/Bootstrapping_(statistics)>`_
360
+
361
+ Notes
362
+ -----
363
+ From a programmatic point of view, it would have been better
364
+ to implement this method for just one input array (because of
365
+ redundant code). However, due to historical reasons (testing
366
+ and comparability to Shape-Out 1), bootstrapping is done
367
+ interleaved for the two arrays.
368
+ """
369
+ # Seed random numbers that are reproducible on different machines
370
+ prng_object = np.random.RandomState(rs)
371
+ # Initialize median arrays
372
+ median_a = np.zeros(bs_iter)
373
+ median_b = np.zeros(bs_iter)
374
+ # If this loop is still too slow, we could get rid of it and
375
+ # do everything with arrays. Depends on whether we will
376
+ # eventually run into memory problems with array sizes
377
+ # of y*bs_iter and yR*bs_iter.
378
+ lena = len(a)
379
+ lenb = len(b)
380
+ for q in range(bs_iter):
381
+ # Compute random indices and draw from a, b
382
+ draw_a_idx = prng_object.randint(0, lena, lena)
383
+ median_a[q] = np.median(a[draw_a_idx])
384
+ draw_b_idx = prng_object.randint(0, lenb, lenb)
385
+ median_b[q] = np.median(b[draw_b_idx])
386
+ return median_a, median_b