pyNIBS 0.2024.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. pyNIBS-0.2024.8.dist-info/LICENSE +623 -0
  2. pyNIBS-0.2024.8.dist-info/METADATA +723 -0
  3. pyNIBS-0.2024.8.dist-info/RECORD +107 -0
  4. pyNIBS-0.2024.8.dist-info/WHEEL +5 -0
  5. pyNIBS-0.2024.8.dist-info/top_level.txt +1 -0
  6. pynibs/__init__.py +34 -0
  7. pynibs/coil.py +1367 -0
  8. pynibs/congruence/__init__.py +15 -0
  9. pynibs/congruence/congruence.py +1108 -0
  10. pynibs/congruence/ext_metrics.py +257 -0
  11. pynibs/congruence/stimulation_threshold.py +318 -0
  12. pynibs/data/configuration_exp0.yaml +59 -0
  13. pynibs/data/configuration_linear_MEP.yaml +61 -0
  14. pynibs/data/configuration_linear_RT.yaml +61 -0
  15. pynibs/data/configuration_sigmoid4.yaml +68 -0
  16. pynibs/data/network mapping configuration/configuration guide.md +238 -0
  17. pynibs/data/network mapping configuration/configuration_TEMPLATE.yaml +42 -0
  18. pynibs/data/network mapping configuration/configuration_for_testing.yaml +43 -0
  19. pynibs/data/network mapping configuration/configuration_modelTMS.yaml +43 -0
  20. pynibs/data/network mapping configuration/configuration_reg_isi_05.yaml +43 -0
  21. pynibs/data/network mapping configuration/output_documentation.md +185 -0
  22. pynibs/data/network mapping configuration/recommendations_for_accuracy_threshold.md +77 -0
  23. pynibs/data/neuron/models/L23_PC_cADpyr_biphasic_v1.csv +1281 -0
  24. pynibs/data/neuron/models/L23_PC_cADpyr_monophasic_v1.csv +1281 -0
  25. pynibs/data/neuron/models/L4_LBC_biphasic_v1.csv +1281 -0
  26. pynibs/data/neuron/models/L4_LBC_monophasic_v1.csv +1281 -0
  27. pynibs/data/neuron/models/L4_NBC_biphasic_v1.csv +1281 -0
  28. pynibs/data/neuron/models/L4_NBC_monophasic_v1.csv +1281 -0
  29. pynibs/data/neuron/models/L4_SBC_biphasic_v1.csv +1281 -0
  30. pynibs/data/neuron/models/L4_SBC_monophasic_v1.csv +1281 -0
  31. pynibs/data/neuron/models/L5_TTPC2_cADpyr_biphasic_v1.csv +1281 -0
  32. pynibs/data/neuron/models/L5_TTPC2_cADpyr_monophasic_v1.csv +1281 -0
  33. pynibs/expio/Mep.py +1518 -0
  34. pynibs/expio/__init__.py +8 -0
  35. pynibs/expio/brainsight.py +979 -0
  36. pynibs/expio/brainvis.py +71 -0
  37. pynibs/expio/cobot.py +239 -0
  38. pynibs/expio/exp.py +1876 -0
  39. pynibs/expio/fit_funs.py +287 -0
  40. pynibs/expio/localite.py +1987 -0
  41. pynibs/expio/signal_ced.py +51 -0
  42. pynibs/expio/visor.py +624 -0
  43. pynibs/freesurfer.py +502 -0
  44. pynibs/hdf5_io/__init__.py +10 -0
  45. pynibs/hdf5_io/hdf5_io.py +1857 -0
  46. pynibs/hdf5_io/xdmf.py +1542 -0
  47. pynibs/mesh/__init__.py +3 -0
  48. pynibs/mesh/mesh_struct.py +1394 -0
  49. pynibs/mesh/transformations.py +866 -0
  50. pynibs/mesh/utils.py +1103 -0
  51. pynibs/models/_TMS.py +211 -0
  52. pynibs/models/__init__.py +0 -0
  53. pynibs/muap.py +392 -0
  54. pynibs/neuron/__init__.py +2 -0
  55. pynibs/neuron/neuron_regression.py +284 -0
  56. pynibs/neuron/util.py +58 -0
  57. pynibs/optimization/__init__.py +5 -0
  58. pynibs/optimization/multichannel.py +278 -0
  59. pynibs/optimization/opt_mep.py +152 -0
  60. pynibs/optimization/optimization.py +1445 -0
  61. pynibs/optimization/workhorses.py +698 -0
  62. pynibs/pckg/__init__.py +0 -0
  63. pynibs/pckg/biosig/biosig4c++-1.9.5.src_fixed.tar.gz +0 -0
  64. pynibs/pckg/libeep/__init__.py +0 -0
  65. pynibs/pckg/libeep/pyeep.so +0 -0
  66. pynibs/regression/__init__.py +11 -0
  67. pynibs/regression/dual_node_detection.py +2375 -0
  68. pynibs/regression/regression.py +2984 -0
  69. pynibs/regression/score_types.py +0 -0
  70. pynibs/roi/__init__.py +2 -0
  71. pynibs/roi/roi.py +895 -0
  72. pynibs/roi/roi_structs.py +1233 -0
  73. pynibs/subject.py +1009 -0
  74. pynibs/tensor_scaling.py +144 -0
  75. pynibs/tests/data/InstrumentMarker20200225163611937.xml +19 -0
  76. pynibs/tests/data/TriggerMarkers_Coil0_20200225163443682.xml +14 -0
  77. pynibs/tests/data/TriggerMarkers_Coil1_20200225170337572.xml +6373 -0
  78. pynibs/tests/data/Xdmf.dtd +89 -0
  79. pynibs/tests/data/brainsight_niiImage_nifticoord.txt +145 -0
  80. pynibs/tests/data/brainsight_niiImage_nifticoord_largefile.txt +1434 -0
  81. pynibs/tests/data/brainsight_niiImage_niifticoord_mixedtargets.txt +47 -0
  82. pynibs/tests/data/create_subject_testsub.py +332 -0
  83. pynibs/tests/data/data.hdf5 +0 -0
  84. pynibs/tests/data/geo.hdf5 +0 -0
  85. pynibs/tests/test_coil.py +474 -0
  86. pynibs/tests/test_elements2nodes.py +100 -0
  87. pynibs/tests/test_hdf5_io/test_xdmf.py +61 -0
  88. pynibs/tests/test_mesh_transformations.py +123 -0
  89. pynibs/tests/test_mesh_utils.py +143 -0
  90. pynibs/tests/test_nnav_imports.py +101 -0
  91. pynibs/tests/test_quality_measures.py +117 -0
  92. pynibs/tests/test_regressdata.py +289 -0
  93. pynibs/tests/test_roi.py +17 -0
  94. pynibs/tests/test_rotations.py +86 -0
  95. pynibs/tests/test_subject.py +71 -0
  96. pynibs/tests/test_util.py +24 -0
  97. pynibs/tms_pulse.py +34 -0
  98. pynibs/util/__init__.py +4 -0
  99. pynibs/util/dosing.py +233 -0
  100. pynibs/util/quality_measures.py +562 -0
  101. pynibs/util/rotations.py +340 -0
  102. pynibs/util/simnibs.py +763 -0
  103. pynibs/util/util.py +727 -0
  104. pynibs/visualization/__init__.py +2 -0
  105. pynibs/visualization/para.py +4372 -0
  106. pynibs/visualization/plot_2D.py +137 -0
  107. pynibs/visualization/render_3D.py +347 -0
@@ -0,0 +1,2984 @@
1
+ import os
2
+ import time
3
+ import h5py
4
+ import inspect
5
+ import warnings
6
+ import multiprocessing
7
+ import pandas as pd
8
+ import numpy as np
9
+ import scipy.stats
10
+ from lmfit import Model
11
+ from scipy.linalg import svd
12
+ from functools import partial
13
+ from scipy.stats import linregress
14
+ from collections import OrderedDict
15
+ from numpy.linalg import lstsq, pinv
16
+ from sklearn.linear_model import LinearRegression
17
+ import pynibs
18
+
19
+
20
+ class Element(object):
21
+ """
22
+ Fit Element object class
23
+ """
24
+ def __init__(self, x, y, ele_id, fun=pynibs.expio.fit_funs.sigmoid4, score_type="R2", select_signed_data=False, constants=None,
25
+ **kwargs):
26
+ """
27
+ Initializes Fit Element instance
28
+ """
29
+ self.x = x
30
+ self.y = y
31
+ self.y_passed = self.y
32
+ self.fun = fun
33
+ self.ele_id = ele_id
34
+ self.init_vals = dict()
35
+ self.random_vals_init_range = dict()
36
+ self.limits = dict()
37
+ self.status = True
38
+ self.log_scale = False
39
+ self.select_signed_data = select_signed_data
40
+ self.score_type = score_type
41
+ self.score = None
42
+ self.best_values = None
43
+ self.r2_lin = None
44
+ self.gmodel = None
45
+ self.fit = None
46
+ self.residual = None
47
+ self.var_y = np.var(self.y)
48
+ self.var_y_passed = self.var_y
49
+
50
+ self.norm_y = np.linalg.norm(self.y)
51
+ self.param_names = None
52
+
53
+ # Add additional parameters as object fields
54
+ self.__dict__.update(kwargs)
55
+
56
+ if constants is None:
57
+ self.constants = {}
58
+
59
+ # select whether the fit is performed for positive or negative data by running an initial linear fit
60
+ # select the data, which yields a fit with a lower p-value; removes unused x and y data
61
+ if self.select_signed_data:
62
+ if not ((self.x < 0).all() or (self.x > 0).all()):
63
+ self.run_select_signed_data()
64
+
65
+ # set initial values of
66
+ self.setup_model()
67
+
68
+ def set_init_vals(self, value):
69
+ """ Sets initial values in self.init_vals and gmodel instance """
70
+ for key in value.keys():
71
+ self.init_vals[key] = value[key]
72
+ self.gmodel.param_hints[key]['value'] = value[key]
73
+
74
+ self.gmodel.make_params()
75
+
76
+ def set_limits(self, value):
77
+ """
78
+ Sets limits in self.limits and gmodel instance
79
+
80
+ Parameters
81
+ ----------
82
+ value : dict
83
+ Parameters (keys) to set in self as limits.
84
+ """
85
+ for key in value.keys():
86
+ self.limits[key] = value[key]
87
+ self.gmodel.param_hints[key]['min'], self.gmodel.param_hints[key]['max'] = value[key]
88
+
89
+ self.gmodel.make_params()
90
+
91
+ def set_constants(self, value):
92
+ """ Sets constants in self.constants and gmodel instance """
93
+ for key in value.keys():
94
+ self.constants[key] = value[key]
95
+ self.gmodel.param_hints[key]['value'] = value[key]
96
+ self.gmodel.param_hints[key]['vary'] = False
97
+
98
+ self.gmodel.make_params()
99
+
100
+ def setup_model(self):
101
+ """ Setup model parameters (limits, initial values, etc. ...) """
102
+ x_min = np.min(self.x)
103
+ x_max = np.max(self.x)
104
+ y_min = np.min(self.y)
105
+ y_max = np.max(self.y)
106
+
107
+ # set up gmodel
108
+ self.gmodel = Model(self.fun)
109
+ self.param_names = self.gmodel.param_names
110
+
111
+ # create the param_hints OrderedDict
112
+ for p in self.gmodel.param_names:
113
+ self.gmodel.param_hints[p] = OrderedDict()
114
+
115
+ # if values are not already given by the configuration file, they are set here
116
+
117
+ if self.fun == pynibs.expio.fit_funs.linear:
118
+ # linear function starts with generic and same values for each element
119
+ if self.limits == {}:
120
+ self.set_limits({"m": [-100, 100], "n": [-100, 100]})
121
+ else:
122
+ self.set_limits(self.limits)
123
+ if self.init_vals == {}:
124
+ self.set_init_vals({"m": 0.3, "n": -1})
125
+ else:
126
+ self.set_init_vals(self.init_vals)
127
+ if self.random_vals_init_range == {}:
128
+ self.random_vals_init_range = {"m": [0, 100], "n": [0, .3]}
129
+
130
+ elif self.fun == pynibs.expio.fit_funs.exp0:
131
+ if self.limits == {}:
132
+ self.set_limits({"x0": [0, 1000], "r": [1e-12, 100]})
133
+ else:
134
+ self.set_limits(self.limits)
135
+ if self.init_vals == {}:
136
+ self.set_init_vals({"x0": 10, "r": .1})
137
+ else:
138
+ self.set_init_vals(self.init_vals)
139
+ if self.random_vals_init_range == {}:
140
+ self.random_vals_init_range = {"x0": [0, 10], "r": [0, .2]}
141
+
142
+ elif self.fun in [pynibs.expio.fit_funs.sigmoid, pynibs.expio.fit_funs.sigmoid_log,
143
+ pynibs.expio.fit_funs.sigmoid4, pynibs.expio.fit_funs.sigmoid4_log]:
144
+ if self.fun in [pynibs.expio.fit_funs.sigmoid_log, pynibs.expio.fit_funs.sigmoid4_log]:
145
+ self.log_scale = True
146
+ self.y_passed = np.log10(self.y)
147
+ self.var_y_passed = np.var(self.y_passed)
148
+
149
+ if self.fun == pynibs.expio.fit_funs.sigmoid4_log and y_min <= 0:
150
+ y0 = 1e-3
151
+ elif (self.fun == pynibs.expio.fit_funs.sigmoid4_log and y_min > 0) or self.fun == pynibs.expio.fit_funs.sigmoid4:
152
+ y0 = y_min
153
+ else:
154
+ y0 = 0
155
+
156
+ if (self.x < 0).all():
157
+ y1 = y_max
158
+ y2 = y_min
159
+ x1 = x_max
160
+ x2 = x_min
161
+ else:
162
+ y1 = y_min
163
+ y2 = y_max
164
+ x1 = x_min
165
+ x2 = x_max
166
+
167
+ # set initial amp to maximum of y-data
168
+ if "amp" not in self.init_vals:
169
+ amp = y_max
170
+ else:
171
+ amp = y_max * self.init_vals["amp"]['m'] + self.init_vals["amp"]['t']
172
+
173
+ # set initial x_0 to 3/4 of x-data range
174
+ if "x0" not in self.init_vals:
175
+ x0 = x1 + 3 / 4 * (x2 - x1)
176
+ else:
177
+ x0 = x1 + float(self.init_vals["x0"]['p']) * (x2 - x1)
178
+
179
+ # set initial r slope to slope of tangent over middle of 25% e-range
180
+ if "r" not in self.init_vals:
181
+ r = 16 / (x_max - x_min) * (y2 - y1) / (amp - y0)
182
+ else:
183
+ r = np.square(self.init_vals["r"]['p']) / (x_max - x_min) * (y2 - y1) / (amp - y0)
184
+
185
+ # choose factor to multiply initial values with to calculate limits
186
+ if "limit_factor" not in self.limits:
187
+ limit_factor = 100
188
+ else:
189
+ limit_factor = self.limits["limit_factor"]
190
+
191
+ # choose factor to multiply initial values with to calculate initial value range during refitting
192
+ if "range_factor" not in self.random_vals_init_range:
193
+ range_factor = 3
194
+ else:
195
+ range_factor = self.random_vals_init_range["range_factor"]
196
+
197
+ self.set_init_vals({"x0": x0,
198
+ "amp": amp,
199
+ "r": r})
200
+ self.random_vals_init_range = {"x0": [np.min((0, x0 * range_factor)),
201
+ np.max((0, x0 * range_factor))],
202
+ "amp": [np.min((0, amp * range_factor)),
203
+ np.max((0, amp * range_factor))],
204
+ "r": [np.min((0, r * range_factor)),
205
+ np.max((0, r * range_factor))]}
206
+ self.set_limits({"x0": [np.min((0, x0 * limit_factor)),
207
+ np.max((0, x0 * limit_factor))],
208
+ "amp": [np.min((1e-12, amp * limit_factor)),
209
+ np.max((1e-12, amp * limit_factor))],
210
+ "r": [np.min((1e-12, r * limit_factor)),
211
+ np.max((1e-12, r * limit_factor))]})
212
+
213
+ if self.fun in [pynibs.expio.fit_funs.sigmoid4, pynibs.expio.fit_funs.sigmoid4_log]:
214
+ if "y0" not in self.init_vals:
215
+ self.set_init_vals({"y0": y0})
216
+ else:
217
+ self.set_init_vals({"y0": y0 * self.init_vals["y0"]['m'] + self.init_vals["y0"]['t']})
218
+
219
+ self.random_vals_init_range["y0"] = [1e-12,
220
+ self.init_vals["y0"] * range_factor]
221
+
222
+ if "y0" not in self.limits:
223
+ self.set_limits({"y0": [1e-12, y_max]})
224
+ else:
225
+ self.set_limits({"y0": [self.limits["y0"]['c'], y_max * self.limits["y0"]['m']]})
226
+
227
+ elif self.fun == pynibs.expio.fit_funs.dummy_fun:
228
+ if "a" not in self.limits:
229
+ self.set_limits({"a": [0, 1]})
230
+ else:
231
+ self.set_limits(self.limits)
232
+
233
+ if "a" not in self.init_vals:
234
+ self.set_init_vals({"a": 1})
235
+ else:
236
+ self.set_init_vals(self.init_vals)
237
+
238
+ if "a" not in self.random_vals_init_range:
239
+ self.random_vals_init_range["a"] = [0, 1]
240
+ else:
241
+ raise NotImplementedError(self.fun)
242
+
243
+ def set_random_init_vals(self):
244
+ """ Set random initial values """
245
+ init_vals_new = dict()
246
+
247
+ for p in self.init_vals.keys():
248
+ init_vals_new[p] = np.random.rand() * \
249
+ (self.random_vals_init_range[p][1] - self.random_vals_init_range[p][0]) \
250
+ + self.random_vals_init_range[p][0]
251
+
252
+ self.set_init_vals(init_vals_new)
253
+
254
+ def run_select_signed_data(self):
255
+ """
256
+ Selects positive or negative data by performing an initial linear fit by comparing the resulting p-values,
257
+ slopes and R2 values. Either positive or negative data (w.r.t. x-axis) yielding a fit with a p-value < 0.05,
258
+ a positive slope and the higher R2 value is used and the remaining data with the other sign is omitted
259
+ from the analysis
260
+ """
261
+ print('Running run_select_signed_data now - not all data is used!')
262
+ mask_pos = self.x > 0
263
+ mask_neg = np.logical_not(mask_pos)
264
+
265
+ # fit positive data (perform the regression when we have at least 20 data points to stabilize results)
266
+ if np.sum(mask_pos) > 20:
267
+ s_pos = scipy.stats.linregress(x=self.x[mask_pos], y=self.y[mask_pos])
268
+ p_pos = s_pos.pvalue
269
+ b_pos = s_pos.slope
270
+ residual = s_pos.slope * self.x[mask_pos] + s_pos.intercept - self.y[mask_pos]
271
+ r2_pos = 1 - np.var(residual) / np.var(self.y[mask_pos])
272
+ else:
273
+ p_pos = 1
274
+ r2_pos = np.NaN
275
+ b_pos = -1
276
+
277
+ # fit negative data (perform the regression when we have at least 20 data points to stabilize results)
278
+ if np.sum(mask_neg) > 20:
279
+ s_neg = scipy.stats.linregress(x=self.x[mask_neg], y=self.y[mask_neg])
280
+ p_neg = s_neg.pvalue
281
+ b_neg = s_neg.slope
282
+ residual = s_neg.slope * self.x[mask_neg] + s_neg.intercept - self.y[mask_neg]
283
+ r2_neg = 1 - np.var(residual) / np.var(self.y[mask_neg])
284
+ else:
285
+ p_neg = 1
286
+ r2_neg = np.NaN
287
+ b_neg = 1
288
+
289
+ # only use data with p < 0.001 and when slopes show an increase of y-data with increasing |x|-data otherwise,
290
+ # set status to False to indicate that the fit is omitted and set score to NaN
291
+ pos_valid = False
292
+ neg_valid = False
293
+
294
+ if (b_pos > 0) and (p_pos < 0.001):
295
+ pos_valid = True
296
+ if (b_neg < 0) and (p_neg < 0.001):
297
+ neg_valid = True
298
+ if pos_valid and not neg_valid:
299
+ self.x = self.x[mask_pos]
300
+ self.y = self.y[mask_pos]
301
+ self.r2_lin = r2_pos
302
+ elif neg_valid and not pos_valid:
303
+ self.x = self.x[mask_neg]
304
+ self.y = self.y[mask_neg]
305
+ self.r2_lin = r2_neg
306
+ elif pos_valid and neg_valid:
307
+ if r2_pos > r2_neg:
308
+ self.x = self.x[mask_pos]
309
+ self.y = self.y[mask_pos]
310
+ self.r2_lin = r2_pos
311
+ else:
312
+ self.x = self.x[mask_neg]
313
+ self.y = self.y[mask_neg]
314
+ self.r2_lin = r2_neg
315
+ else:
316
+ self.status = False
317
+ self.score = np.NaN
318
+
319
+ self.var_y = np.var(self.y)
320
+ self.norm_y = np.linalg.norm(self.y)
321
+
322
+ if self.log_scale:
323
+ self.y_passed = np.log10(self.y)
324
+ else:
325
+ self.y_passed = self.y
326
+
327
+ def run_fit(self, max_nfev=1000):
328
+ """
329
+ Perform data fit with lmfit.
330
+ """
331
+ if self.score_type != "rho":
332
+ fit = self.gmodel.fit(self.y_passed, x=self.x,
333
+ calc_covar=False, method="leastsq", max_nfev=max_nfev, scale_covar=False)
334
+ self.best_values = fit.best_values
335
+ self.residual = fit.residual
336
+ self.calc_score()
337
+
338
+ def calc_score(self):
339
+ """
340
+ Determine goodness-of-fit score.
341
+ """
342
+ # R2
343
+ if self.score_type == "R2":
344
+ # self.score = 1 - np.var(self.residual) / self.var_y_passed
345
+ self.score = 1 - np.sum(self.residual**2) / (self.var_y_passed * len(self.residual))
346
+ elif self.score_type == "R2_old":
347
+ self.score = 1 - np.var(self.residual) / self.var_y_passed
348
+ # Relative standard error of regression
349
+ elif self.score_type == "SR":
350
+ self.score = 1 - np.linalg.norm(self.residual) / self.norm_y
351
+ elif self.score_type == "rho":
352
+ self.score = scipy.stats.spearmanr(self.y_passed, b=self.x)
353
+ else:
354
+ raise NotImplementedError(f"{self.score_type} not implemented.")
355
+
356
+
357
+ def workhorse_element_run_fit(element, max_nfev=10):
358
+ """ Workhorse to run single Element fit. If status is False, the element will not be fitted. """
359
+ if element.status:
360
+ element.run_fit(max_nfev=max_nfev * len(element.x))
361
+
362
+ return element
363
+
364
+
365
+ def workhorse_element_init(ele_id, e_matrix, mep, fun, score_type, select_signed_data, constants, **kwargs):
366
+ """ Workhorse to initialize Elements. """
367
+ element = Element(x=e_matrix[:, ele_id],
368
+ y=mep,
369
+ ele_id=ele_id,
370
+ fun=fun,
371
+ score_type=score_type,
372
+ select_signed_data=select_signed_data,
373
+ constants=constants,
374
+ **kwargs)
375
+ return element
376
+
377
+
378
+ def regress_data(e_matrix, mep, elm_idx_list=None, element_list=None, fun=pynibs.expio.fit_funs.sigmoid4, n_cpu=4, con=None,
379
+ n_refit=50, zap_idx=None, return_fits=False, score_type="R2",
380
+ verbose=False, pool=None, refit_discontinuities=True, select_signed_data=False,
381
+ mp_context="fork", **kwargs):
382
+ """
383
+ Mass-univariate nonlinear regressions on raw MEP_{AMP} ~ E.
384
+ That is, for each element in elm_idx_list, it's E (mag | norm | tan) for each zap regressed on the raw MEP
385
+ amplitude. An element wise R2 score is returned.
386
+ The function reads the precomputed array of E-MEP data from an .hdf5 file.
387
+
388
+ Parameters
389
+ ----------
390
+ e_matrix : np.ndarray of float
391
+ (n_zaps, n_ele) Electric field matrix.
392
+ mep : np.ndarray of float
393
+ (n_zaps,) Motor evoked potential for each stimulation.
394
+ elm_idx_list : np.ndarray of int or list of int
395
+ (n_zaps,) List containing the element indices the fit is performed for.
396
+ element_list : list of Element object instances, optional
397
+ [n_ele] pynibs.Element objects ot skip initialization here.
398
+ fun : pynibs.exp.Mep, default: pynibs.sigmoid4
399
+ A pynibs.exp.Mep function (exp0, sigmoid, sigmoid4, ...).
400
+ n_cpu : int, default: 4
401
+ Number of threads, if n_cpu=1 no parallel pool will be opened and all calculations are done in serial.
402
+ con : np.ndarray of float, optional
403
+ (n_ele, 3 or 4) Connectivity matrix of ROI. Needed in case of refit for discontinuity checks.
404
+ n_refit : int, default: 50
405
+ Maximum number of refits of zero elements. No refit is applied in case of n_refit = 0.
406
+ zap_idx : np.ndarray of int or list of int, optional
407
+ Which e/mep pairs to use.
408
+ return_fits : bool, optional
409
+ Return fit objects containing the parameter estimates.
410
+ score_type : str, default: "R2"
411
+ Error measure of fit:
412
+
413
+ * "R2": R2 score (Model variance / Total variance); linear fits: [0, 1], 1 ... perfect fit
414
+ * "SR": Relative standard error of regression (1 - Error 2-norm / Data 2-norm); [-Inf, 1], 1 ... perfect fit
415
+ * "rho": Spearman correlation coefficient [-1, 1]; finds any monotonous correlation (0 means no correlation)
416
+ verbose : bool, default: False
417
+ Plot output messages.
418
+ pool : multiprocessing.Pool()
419
+ pool instance to use.
420
+ refit_discontinuities : bool, default: True
421
+ Refit discontinuous elements. If True, provide _con_.
422
+ mp_context : str, default: "fork"
423
+ Controls the method the sub-processes of the multiprocessing pool (in case of n_cpu > 1) are launched.
424
+
425
+ * fork: (only supported by Unix) mp processes diverge from the main process,
426
+ the entire stack, variables and other resources are copied over.
427
+ From the docs: "The child process, when it begins, is effectively identical to the parent process.
428
+ All resources of the parent are inherited by the child process. Note that safely forking a
429
+ multithreaded process is problematic."
430
+ * spawn: (supported by Window and Unix) mp processes are launched in an entirely new Python interpreter
431
+ as separate processes. Variables are copied other resources are freshly instantiated.
432
+ From the docs: "In particular, unnecessary file descriptors and handles from the parent process
433
+ will not be inherited. Starting a process using this method is rather slow compared to using
434
+ fork or forkserver."
435
+ **kwargs
436
+ Passed on to pynibs.Element() to set fit parameters.
437
+
438
+ Returns
439
+ -------
440
+ score : np.ndarray of float
441
+ (n_roi, n_qoi) Score for each element.
442
+ best_values : list of dict
443
+ (n_ele) List of parameter fits. Only returned if return_fits=True.
444
+ """
445
+ if zap_idx is None:
446
+ zap_idx = np.array(range(e_matrix.shape[0]))
447
+ if isinstance(zap_idx, list):
448
+ zap_idx = np.array(zap_idx)
449
+
450
+ if refit_discontinuities:
451
+ assert con is not None, f"Provide 'con' parameter to fit discontinuties"
452
+ refit_thr = 1e-6
453
+ constants = None
454
+
455
+ if elm_idx_list is None:
456
+ elm_idx_list = np.arange(e_matrix.shape[1])
457
+
458
+ if fun == pynibs.expio.fit_funs.dummy_fun:
459
+ c_all = np.random.random(len(elm_idx_list))
460
+
461
+ if return_fits:
462
+ best_values = [{"a": 1} for _ in range(len(elm_idx_list))]
463
+ return c_all, best_values
464
+ else:
465
+ return c_all
466
+
467
+ # shuffle elements because some of them need longer to compute
468
+ # (in this way it is distributed more equally over all cores)
469
+ np.random.shuffle(elm_idx_list)
470
+
471
+ # Setting up parallelization
472
+ ####################################################################
473
+ if n_cpu > 1:
474
+ if not pool:
475
+ n_cpu_available = multiprocessing.cpu_count()
476
+ n_cpu = min(n_cpu, n_cpu_available, len(elm_idx_list))
477
+ pool = multiprocessing.get_context(mp_context).Pool(n_cpu)
478
+ local_pool = True
479
+
480
+ if verbose:
481
+ print(" > Setting up multiprocessing using {}/{} cores".format(n_cpu, n_cpu_available))
482
+ else:
483
+ local_pool = False # close pool only if created locally
484
+ if verbose:
485
+ print(" > Using provided pool object")
486
+
487
+ # defining workhorses
488
+ workhorse_partial = partial(workhorse_element_run_fit,
489
+ max_nfev=10)
490
+
491
+ workhorse_init_partial = partial(workhorse_element_init,
492
+ e_matrix=e_matrix[zap_idx],
493
+ mep=mep[zap_idx],
494
+ fun=fun,
495
+ score_type=score_type,
496
+ select_signed_data=select_signed_data,
497
+ constants=constants,
498
+ **kwargs)
499
+ else:
500
+ local_pool = False
501
+ if verbose:
502
+ print(" > Running computations with n_cpu=1.")
503
+
504
+ # initialize elements
505
+ ####################################################################
506
+ if element_list is None:
507
+ start = time.time()
508
+ if n_cpu <= 1:
509
+ element_list = [Element(x=e_matrix[zap_idx][:, ele_id],
510
+ y=mep[zap_idx],
511
+ ele_id=ele_id,
512
+ fun=fun,
513
+ score_type=score_type,
514
+ select_signed_data=select_signed_data,
515
+ constants=constants,
516
+ **kwargs
517
+ ) for ele_id in elm_idx_list]
518
+
519
+ else:
520
+ element_list = pool.map(workhorse_init_partial, elm_idx_list)
521
+ stop = time.time()
522
+ if verbose:
523
+ print(f"Initialized {len(elm_idx_list)} elements: {stop - start:2.2f} s")
524
+
525
+ # run fit
526
+ ####################################################################
527
+ start = time.time()
528
+
529
+ if n_cpu <= 1:
530
+ for ele in element_list:
531
+ ele.run_fit(max_nfev=10 * len(ele.x))
532
+ else:
533
+ element_list = pool.map(workhorse_partial, element_list)
534
+
535
+ stop = time.time()
536
+
537
+ if verbose:
538
+ print(f"Determined scores: {stop - start:2.2f} s")
539
+
540
+ if fun == pynibs.expio.fit_funs.linear:
541
+ n_refit = 0
542
+ refit_discontinuities = False
543
+ print("Skipping refit for linear fits ...")
544
+
545
+ # refit elements
546
+ ####################################################################
547
+ if n_refit > 0:
548
+
549
+ # refit bad elements
550
+ ####################################################################
551
+ i_refit = 0
552
+ while i_refit < n_refit:
553
+
554
+ # get index in Element_list of elements where refit should be performed
555
+ ele_idx_refit = [i_ele for i_ele, ele in enumerate(element_list) if ele.score < refit_thr]
556
+ element_list_refit = [element_list[i_ele] for i_ele in ele_idx_refit]
557
+
558
+ if len(element_list_refit) > 0:
559
+
560
+ if verbose:
561
+ print(f" > Performing refit for {len(element_list_refit)} zero elements ...")
562
+
563
+ # set random start values
564
+ for ele in element_list_refit:
565
+ ele.set_random_init_vals()
566
+
567
+ start = time.time()
568
+
569
+ if n_cpu == 1:
570
+ for ele in element_list_refit:
571
+ ele.run_fit(max_nfev=10 * len(ele.x))
572
+ else:
573
+ element_list_refit = pool.map(workhorse_partial, element_list_refit)
574
+
575
+ stop = time.time()
576
+
577
+ if verbose:
578
+ print(f"Determined scores: {stop - start:2.2f} s")
579
+
580
+ # replace new fits if they have higher scores than the old ones
581
+ for i_ele, ele_idx_re in enumerate(ele_idx_refit):
582
+ if element_list_refit[i_ele].score > element_list[ele_idx_re].score:
583
+ element_list[ele_idx_re] = element_list_refit[i_ele]
584
+
585
+ i_refit += 1
586
+ else:
587
+ break
588
+
589
+ # sort elements for discontinuity check
590
+ element_list = [ele for _, ele in sorted(zip(elm_idx_list, element_list))]
591
+
592
+ # find discontinuities and refit
593
+ ##################################################################
594
+ if refit_discontinuities:
595
+
596
+ if len(element_list) > 1:
597
+ score = np.array([ele.score for ele in element_list])
598
+ not_fitted_elms = np.array([idx for idx, ele in enumerate(element_list) if np.isnan(ele.score)])
599
+ idx_disc, idx_neighbor = pynibs.get_indices_discontinuous_data(data=score,
600
+ con=con,
601
+ neighbor=True,
602
+ deviation_factor=2,
603
+ not_fitted_elms=not_fitted_elms)
604
+ element_list_disc = [element_list[i_ele] for i_ele in idx_disc]
605
+
606
+ if len(idx_disc) > 0:
607
+ if verbose:
608
+ print(f" > Performing refit for {len(idx_disc)} discontinuous elements ...")
609
+
610
+ # refit for discontinuous elements
611
+ if len(idx_disc) > 0:
612
+ # set start values from neighbors
613
+ for i_ele, idx_ne in zip(range(len(idx_disc)), idx_neighbor):
614
+ element_list_disc[i_ele].set_init_vals(element_list[idx_ne].best_values)
615
+
616
+ start = time.time()
617
+
618
+ if n_cpu == 1:
619
+ for ele in element_list_disc:
620
+ ele.run_fit(max_nfev=10 * len(ele.x))
621
+ else:
622
+ element_list_disc = pool.map(workhorse_partial, element_list_disc)
623
+
624
+ stop = time.time()
625
+
626
+ if verbose:
627
+ print(f"Determined scores: {stop - start:2.2f} s")
628
+
629
+ # replace new fits if they have higher scores than the old ones
630
+ for i_ele, ele_idx_re in enumerate(idx_disc):
631
+ if element_list_disc[i_ele].score > element_list[ele_idx_re].score:
632
+ element_list[ele_idx_re] = element_list_disc[i_ele]
633
+
634
+ score = np.array([ele.score for ele in element_list])
635
+ if local_pool:
636
+ pool.close()
637
+ pool.join()
638
+
639
+ if return_fits:
640
+ best_values = np.array([ele.best_values for ele in element_list])
641
+ return score, best_values
642
+ else:
643
+ return score
644
+
645
+
646
+ def sing_elm_raw(elm_idx_list, mep_lst, mep_params, e, alpha=1000):
647
+ """
648
+ Mass-univariate ridge regressions on raw MEP_{AMP} ~ E.
649
+ That is, for each element in elm_idx_list, it's E (mag | norm | tan) for each zap regressed on the raw MEP
650
+ amplitude. An element wise sklearn.metrics.regression.r2_score is returned.
651
+
652
+ elm_idx_list : np.ndarray
653
+ (chunksize) List of element indices, the congruence factor is computed for.
654
+ mep: list of Mep object instances
655
+ (n_cond) List of fitted Mep object instances for all conditions (see exp.py for more information of Mep class).
656
+ mep_params: np.ndarray of float
657
+ (n_mep_params_total) List of all mep parameters of curve fits used to calculate the MEP (accumulated into 1
658
+ array) (e.g.: [mep_#1_para_#1, mep_#1_para_#2, mep_#1_para_#3, mep_#2_para_#1, mep_#2_para_#1, ...])
659
+ e: np.ndarray of float
660
+ (n_elm, n_cond, n_qoi) array of the electric field to compute the r2 factor for, e.g. (e_mag, e_norm, e_tan).
661
+
662
+ Returns
663
+ -------
664
+ r2: np.ndarray of float
665
+ (n_roi, n_datasets) R^2 for each element in elm_idx_list.
666
+ """
667
+ from pandarallel import pandarallel
668
+ pandarallel.initialize(verbose=0)
669
+
670
+ def cartesian_product(*arrays):
671
+ """
672
+ Fast implementation to get cartesian product of two arrays.
673
+
674
+ cartesian_product([a,b,c],[2,3]) =
675
+ [a, 2
676
+ a, 3
677
+ b, 2
678
+ b, 3
679
+ c, 2
680
+ c, 3]
681
+ """
682
+ la = len(arrays)
683
+ dtype = np.result_type(*arrays)
684
+ arr = np.empty([len(a) for a in arrays] + [la], dtype=dtype)
685
+ for i, a in enumerate(np.ix_(*arrays)):
686
+ arr[..., i] = a
687
+ return arr.reshape(-1, la)
688
+
689
+ n_eqoi = e.shape[2]
690
+ n_cond = e.shape[1]
691
+ n_elm = e.shape[0]
692
+ assert n_cond == len(mep_lst)
693
+ scores = None
694
+ reg_r2 = np.empty((n_elm, n_eqoi))
695
+
696
+ for qoi_idx in range(n_eqoi):
697
+ # t_q = time.time()
698
+ x = pd.DataFrame()
699
+ index_shift = 0
700
+ amplitudes = np.array(())
701
+
702
+ start = time.time()
703
+ for mep_i, mep in enumerate(mep_lst):
704
+ # condition wise, as we stimulated with different intensities per conditions
705
+
706
+ # for each element in roi, one datapoint for each zap.
707
+ current = cartesian_product(e[:, mep_i, qoi_idx], mep.intensities)
708
+
709
+ # index is iteration of zaps over all conditions
710
+ index = cartesian_product(e[:, mep_i, qoi_idx], np.arange(len(mep.intensities)))[:, 1]
711
+ index += index_shift
712
+ index_shift = index[-1] + 1
713
+
714
+ # el is range(n_elements) * n_zaps_in_condition
715
+ el_idx = np.repeat(np.arange(e.shape[0]), len(mep.intensities))
716
+
717
+ # intensity * e
718
+ e_zap = np.multiply(current[:, 0], current[:, 1])
719
+
720
+ # put all together
721
+ x_cond = pd.DataFrame(data={"index": index.astype(int),
722
+ "el": el_idx,
723
+ "e": e_zap})
724
+ amplitudes = np.append(amplitudes, mep.mep)
725
+ # "current": current[:, 1],
726
+ # "mep": mep[:,1]})
727
+
728
+ # x_cond['condition'] = mep_i
729
+ x = x.append(x_cond)
730
+
731
+ stop = time.time()
732
+ print(f"Prepare dataset t = {stop - start}")
733
+ # x.shape is now (n_zaps*n_elms, 3)
734
+
735
+ # reshape to (n_zaps, n_elms)
736
+ start = time.time()
737
+ x = x.pivot(index="index", columns="el", values="e")
738
+ stop = time.time()
739
+ print(f"Pivot t = {stop - start}")
740
+
741
+ do_reg_poly = False
742
+ do_reg_linear = True
743
+ reg = LinearRegression()
744
+
745
+ if do_reg_poly:
746
+ # fn = "/data/pt_01756/tmp/reg/mean_data_roi_lasso.hdf5"
747
+ # print "creating polynomial features"
748
+ # poly = PolynomialFeatures(2, interaction_only=True, include_bias=False)
749
+ # x_pol = poly.fit_transform(x.iloc[:,:-3])
750
+ # print "fitting regressor"
751
+ # reg.fit(x_pol, x['mep'])
752
+ # with h5py.File(fn, 'a') as f:
753
+ # f.create_dataset(name='/data/tris/' + e,
754
+ # data=reg.coef_[:E.shape[0]])
755
+ # e_poly = []
756
+ # # get interaction mapped back to elemens
757
+ # for el in range(E.shape[0]):
758
+ # print "getting indices for interactions"
759
+ # idx = [i for i, s in enumerate(poly.get_feature_names()) if 'x{} '.format(el + 1) in s]
760
+ # e_poly.append(np.sum(reg.coef_[idx]))
761
+ # f.create_dataset(name='/data/tris/' + e + '_poly',
762
+ # data=e_poly)
763
+ # data_qoi_tmp = e_poly
764
+ raise NotImplementedError
765
+
766
+ elif do_reg_linear:
767
+ # Do one regression per element.
768
+ # r_t = time.time()
769
+
770
+ def get_score(x_i):
771
+ """Helper function do be used by pd.apply() to speed up things.
772
+
773
+ Paramsmeters
774
+ ------------
775
+ x_i: pd.Series
776
+ Column with e for a single elm.
777
+
778
+ Returns
779
+ -------
780
+ r2 for amplitudes ~ E
781
+ """
782
+ # x_i = x_i.reshape(-1, 1)
783
+ reg.fit(x_i.reshape(-1, 1), amplitudes)
784
+ return reg.score(x_i.reshape(-1, 1), amplitudes)
785
+
786
+ # apply get_score function column wise
787
+ start = time.time()
788
+ # scores = x.parallel_apply(get_score, axis=0, raw=True)
789
+ scores = x.transpose().swifter.apply(get_score, axis=1, raw=True)
790
+ stop = time.time()
791
+ print(f"Fit and score t = {stop - start}")
792
+ # print "all_reg: {}".format(time.time() - r_t)
793
+
794
+ reg_r2[:, qoi_idx] = np.array(scores)
795
+ # data.append(data_qoi_tmp)
796
+
797
+ # print "qoi {}: {}".format(qoi_idx, time.time() - t_q)
798
+
799
+ return reg_r2
800
+
801
+
802
+ def write_regression_hdf5(fn_exp_hdf5, fn_reg_hdf5, qoi_path_hdf5, qoi_phys, e_results_folder, qoi_e, roi_idx,
803
+ conds_exclude):
804
+ """
805
+ Reads the stimulation intensities from the experiment.hdf5 file.
806
+ Reads the qoi from the experiment.hdf5 file.
807
+ Reads the electric fields from the electric field folder.
808
+ Weights the electric field voxel wise with the respective intensities
809
+ and writes an .hdf5 file containing the preprocessed data (pandas dataframe).
810
+
811
+ fn_exp_hdf5 : str
812
+ Filename of the experiment.hdf5 file.
813
+ fn_reg_hdf5 : str
814
+ Filename of output regression.hdf5 file.
815
+ qoi_path_hdf5 : str
816
+ Path in experiment.hdf5 file pointing to the pandas dataframe containing the qoi.
817
+ (e.g.: "phys_data/postproc/EMG")
818
+ qoi : str
819
+ Name of QOI the congruence factor is calculated with.
820
+ (e.g.: "p2p")
821
+ fn_e_results : str
822
+ Folder containing the electric fields.
823
+ (e.g.: "/data/pt_01756/probands/13061.30/results/electric_field/1")
824
+ qoi_e : str or list of str
825
+ Quantities of the electric field used to calculate the congruence factor (e.g. ["E", "E_norm", "E_tan"]
826
+ Has to be included in e.hdf5 -> e.g.: "data/midlayer/roi_surface/1/E".
827
+ roi_idx : int
828
+ ROI index.
829
+ conds_exclude : str or list of str
830
+ Conditions to exclude.
831
+
832
+ Returns
833
+ -------
834
+ <File>: .hdf5 file
835
+ File containing the intensity (current) scaled E-fields of the conditions in the ROI.
836
+ Saved in datasets with the same name as qoi_e ["E", "E_norm", "E_tan"]
837
+ """
838
+ def cartesian_product(*arrays):
839
+ """
840
+ Fast implementation to get cartesian product of two arrays.
841
+
842
+ cartesian_product([a,b,c],[2,3]) =
843
+ [a, 2
844
+ a, 3
845
+ b, 2
846
+ b, 3
847
+ c, 2
848
+ c, 3]
849
+ """
850
+ la = len(arrays)
851
+ dtype = np.result_type(*arrays)
852
+ arr = np.empty([len(a) for a in arrays] + [la], dtype=dtype)
853
+ for i, a in enumerate(np.ix_(*arrays)):
854
+ arr[..., i] = a
855
+ return arr.reshape(-1, la)
856
+
857
+ if conds_exclude is not list:
858
+ conds_exclude = [conds_exclude]
859
+
860
+ if type(qoi_e) is not list:
861
+ qoi_e = [qoi_e]
862
+
863
+ # read dataframe of stimulation data
864
+ df_stim_data = pd.read_hdf(fn_exp_hdf5, "stim_data") # type: pd.DataFrame
865
+ conds = np.unique(df_stim_data["condition"])
866
+ conds = [c for c in conds if c not in conds_exclude]
867
+
868
+ # read dataframe of postproc containing the qoi
869
+ df_qoi = pd.read_hdf(fn_exp_hdf5, qoi_path_hdf5) # type: pd.DataFrame
870
+
871
+ n_conds = len(conds)
872
+ n_qoi_e = len(qoi_e)
873
+ n_ele = 0
874
+
875
+ # read electric fields
876
+ for i_c, c in enumerate(conds):
877
+
878
+ # read electric field
879
+ fn_e_hdf5 = os.path.join(e_results_folder, c, 'simulations', 'e.hdf5')
880
+ print(" > Loading electric field from file: {}".format(fn_e_hdf5))
881
+ f = h5py.File(fn_e_hdf5, 'r')
882
+
883
+ # generate E array in first iteration
884
+ if i_c == 0:
885
+ n_ele = f[f"data/midlayer/roi_surface/{roi_idx}/{qoi_e[0]}"][:].shape[0]
886
+
887
+ # np.array [n_ele x n_zaps x n_qoi]
888
+ e = np.zeros((n_ele, n_conds, n_qoi_e))
889
+
890
+ # midlayer E
891
+ for i_q_e, q_e in enumerate(qoi_e):
892
+ e[:, i_c, i_q_e] = f[f"data/midlayer/roi_surface/{roi_idx}/{q_e}"][:]
893
+
894
+ print("\n")
895
+
896
+ # scale electric fields with respective intensities
897
+ for i_q_e, q_e in enumerate(qoi_e):
898
+ print(f"Preparing regression datasets for {q_e}")
899
+ print(f"========================================")
900
+ x = pd.DataFrame()
901
+ index_shift = 0
902
+ qoi_amplitudes = np.array(())
903
+
904
+ start = time.time()
905
+ for i_c, c in enumerate(conds):
906
+ # extract stimulation intensity and qoi amplitude for condition
907
+ mask = df_stim_data["condition"].values == c
908
+ stim_intensity = df_stim_data.loc[mask, "current"].values
909
+
910
+ # for each element in roi, one datapoint for each zap.
911
+ e_stim_intensity = cartesian_product(e[:, i_c, i_q_e], stim_intensity)
912
+
913
+ # intensity * e
914
+ e_scaled = np.multiply(e_stim_intensity[:, 0], e_stim_intensity[:, 1])
915
+
916
+ # index is iteration of zaps over all conditions
917
+ index = cartesian_product(e[:, i_c, i_q_e], np.arange(len(stim_intensity)))[:, 1]
918
+ index += index_shift
919
+ index_shift = index[-1] + 1
920
+
921
+ # el is range(n_elements) * n_zaps_in_condition
922
+ el_idx = np.repeat(np.arange(n_ele), len(stim_intensity))
923
+
924
+ # put all together
925
+ x_cond = pd.DataFrame(data={"index": index.astype(int),
926
+ "el": el_idx,
927
+ "e": e_scaled})
928
+
929
+ qoi_amplitudes = np.append(qoi_amplitudes, df_qoi.loc[mask, qoi_phys].values)
930
+
931
+ x = x.append(x_cond)
932
+
933
+ stop = time.time()
934
+ print(f"Prepare dataset: t = {stop - start}")
935
+ # x.shape is now (n_zaps*n_elms, 3)
936
+
937
+ # reshape to (n_zaps, n_elms)
938
+ start = time.time()
939
+ x = x.pivot(index="index", columns="el", values="e")
940
+ x["qoi_amplitudes"] = qoi_amplitudes
941
+ stop = time.time()
942
+ print(f"Pivot: t = {stop - start}")
943
+
944
+ start = time.time()
945
+ x.to_hdf(fn_reg_hdf5, q_e)
946
+ stop = time.time()
947
+ print(f"Write hdf5: t = {stop - start}")
948
+ print(f"\n")
949
+
950
+
951
+ def ridge_from_hdf5(elm_idx_list, fn_reg_hdf5, qoi_path_hdf5, zap_idx=None):
952
+ """
953
+ Mass-univariate ridge regressions on raw MEP_{AMP} ~ E.
954
+ That is, for each element in elm_idx_list, it's E (mag | norm | tan) for each zap regressed on the raw MEP
955
+ amplitude. An element wise sklearn.metrics.regression.r2_score is returned.
956
+ The function reads the precomputed array of E-MEP data from an .hdf5 file.
957
+ Always uses all cores of a machine!
958
+
959
+ elm_idx_list : list of int
960
+ List containing the element indices the fit is performed for.
961
+ fn_hdf5 : str
962
+ Filename (incl. path) containing the precomputed E-MEP dataframes.
963
+ qoi_path_hdf5 : str
964
+ Path in .hdf5 file to dataset of electric field qoi.
965
+ zap_idx : np.ndarray, optional
966
+ (n_zaps) Indices of zaps the congruence factor is calculated with (default: all).
967
+
968
+ Returns
969
+ -------
970
+ r2 : np.ndarray of float
971
+ (n_roi, n_datasets) R^2 for each element in elm_idx_list.
972
+ """
973
+ x = pd.read_hdf(fn_reg_hdf5, qoi_path_hdf5) # type: pd.DataFrame
974
+
975
+ # use all stimuli if zap_idx is not provided
976
+ if zap_idx is None:
977
+ zap_idx = np.arange(x.shape[0])
978
+
979
+ n_zaps = x.shape[0]
980
+ mask = np.zeros(n_zaps).astype(bool)
981
+ mask[zap_idx] = True
982
+ qoi_amplitudes = x.loc[mask, "qoi_amplitudes"].values
983
+ x = x.drop("qoi_amplitudes", axis=1)
984
+
985
+ def get_score(x_i):
986
+ """Helper function do be used by pd.apply() to speed up things.
987
+
988
+ Parameters
989
+ ----------
990
+ x_i : pd.Series
991
+ Column with e for a single elm.
992
+
993
+ Returns
994
+ -------
995
+ r2 for amplitudes ~ E
996
+ """
997
+
998
+ reg.fit(x_i.reshape(-1, 1), qoi_amplitudes)
999
+ return reg.score(x_i.reshape(-1, 1), qoi_amplitudes)
1000
+
1001
+ reg = LinearRegression()
1002
+
1003
+ # apply get_score function column (element) wise
1004
+ # start = time.time()
1005
+ r2 = x.loc[mask, elm_idx_list].apply(get_score, axis=0, raw=True)
1006
+ # stop = time.time()
1007
+ # print(f"Fit and score t = {stop - start}")
1008
+
1009
+ return np.array(r2)[:, np.newaxis]
1010
+
1011
+
1012
+ def fit_elms(elm_idx_list, e_matrix, mep, zap_idx=None,
1013
+ fun=pynibs.expio.fit_funs.sigmoid4, init_vals=None, limits=None, log_scale=False,
1014
+ constants=None, max_nfev=None, bad_elm_idx=None, score_type="R2", # mask_e_field=None,
1015
+ verbose=False):
1016
+ """
1017
+ Workhorse for Mass-univariate nonlinear regressions on raw MEP_{AMP} ~ E.
1018
+ That is, for each element in elm_idx_list, it's E (mag | norm | tan) for each zap regressed on the raw MEP
1019
+ amplitude. An element wise r2 score is returned.
1020
+
1021
+ Parameters
1022
+ ----------
1023
+ elm_idx_list : list of int or np.ndarray
1024
+ List containing the element indices the fit is performed for.
1025
+ e_matrix : np.ndarray of float
1026
+ (n_zaps, n_ele) Electric field matrix.
1027
+ mep : np.ndarray of float
1028
+ (n_zaps) Motor evoked potentials for every stimulation.
1029
+ zap_idx : np.ndarray, optional
1030
+ (n_zaps) Indices of zaps the congruence factor is calculated with (default: all).
1031
+ fun : str
1032
+ A function name of pynibs.exp.Mep (exp0, sigmoid).
1033
+ init_vals : np.ndarray of dict
1034
+ (len(elm_idx_list)) Dictionary containing the initial values for each element as np.ndarray.
1035
+ The keys are the free parameters of fun, e.g. "x0", "amp", etc.
1036
+ limits : pd.DataFrame
1037
+ Dictionary containing the limits of each parameter for each element e.g.: limits["x0"][elm_idx] = [min, max].
1038
+ log_scale : bool, default: False
1039
+ Log-transform data before fit (necessary for functions defined in the log domain).
1040
+ constants : dict of <string>:<num>, optional
1041
+ key:value pair of model parameters not to optimize.
1042
+ max_nfev : int, optional
1043
+ Max fits, passed to model.fit() as max_nfev=max_nfev*len(x).
1044
+ bad_elm_idx : np.ndarray, optional
1045
+ Indices of elements not to fit, with indices corresponding to indices (not values) of elm_idx_list.
1046
+ score_type : str, default: "R2"
1047
+ Goodness of fit measure; Choose SR for nonlinear fits and R2 or SR for linear fits:
1048
+
1049
+ * "R2": R2 score (Model variance / Total variance) [0, 1] for linear models; 0: bad fit; 1: perfect fit
1050
+ * "SR": Relative standard error of regression (1 - Error 2-norm / Data 2-norm) [-inf, 1]; 1: perfect fit
1051
+ verbose : bool, default: False
1052
+ Print verbosity information
1053
+
1054
+ Returns
1055
+ -------
1056
+ r2 : np.ndarray of float
1057
+ (n_roi, 1) R2 for each element in elm_idx_list.
1058
+ best_values : np.ndarray of object
1059
+ Fit parameters returned from the optimizer.
1060
+ """
1061
+ str_pref = "Main"
1062
+ start = time.time()
1063
+ try:
1064
+ str_pref = f"{multiprocessing.current_process()._identity[0]:0>2} "
1065
+ except:
1066
+ pass
1067
+
1068
+ # use all stimuli if zap_idx is not provided
1069
+ if zap_idx is not None:
1070
+ e_matrix = e_matrix[zap_idx, :]
1071
+ mep = mep.iloc[zap_idx]
1072
+ n_zaps = e_matrix.shape[0]
1073
+
1074
+ if bad_elm_idx is None:
1075
+ bad_elm_idx = set(np.array([]))
1076
+
1077
+ best_values = [0] * len(elm_idx_list)
1078
+ r2 = np.zeros((len(elm_idx_list),)) - 10
1079
+
1080
+ if fun == pynibs.expio.fit_funs.dummy_fun:
1081
+ return r2, np.array(best_values)
1082
+
1083
+ if max_nfev is not None:
1084
+ max_nfev = n_zaps * max_nfev
1085
+
1086
+ # set up gmodel
1087
+ gmodel = Model(fun)
1088
+
1089
+ # create the param_hints ordereddict only once.
1090
+ for p in gmodel.param_names:
1091
+ gmodel.param_hints[p] = OrderedDict()
1092
+
1093
+ param_hints = gmodel.param_hints
1094
+ make_params = gmodel.make_params
1095
+ param_names = gmodel.param_names
1096
+
1097
+ if limits is not None:
1098
+ limits_p = list(limits.columns)
1099
+ else:
1100
+ limits_p = set()
1101
+
1102
+ if init_vals is not None:
1103
+ inits_p = list(init_vals.columns)
1104
+ if constants is not None:
1105
+ inits_p = np.setdiff1d(inits_p, list(constants.keys()))
1106
+ else:
1107
+ inits_p = set()
1108
+
1109
+ constants_elmwise = {}
1110
+ if constants is not None:
1111
+ for p in param_names:
1112
+ if p in constants:
1113
+ param_hints[p]['vary'] = False
1114
+
1115
+ try:
1116
+ len(constants[p])
1117
+ constants_elmwise[p] = constants[p]
1118
+ except TypeError:
1119
+ param_hints[p]['value'] = constants[p]
1120
+
1121
+ # transform mep to log domain
1122
+ if log_scale:
1123
+ qoi = np.log10(mep)
1124
+ else:
1125
+ qoi = mep
1126
+
1127
+ qoi_norm = np.linalg.norm(qoi)
1128
+ qoi_var = np.var(qoi)
1129
+ best_values = {k: [] for k in init_vals.keys()}
1130
+ # loop over elements
1131
+ for i, elm_idx in enumerate(elm_idx_list):
1132
+ if i in bad_elm_idx:
1133
+ [best_values[k].append(0) for k, v in best_values.items()]
1134
+ continue
1135
+ for p in limits_p:
1136
+ # set limits
1137
+ # skip the set_param_hin function and do this by hand
1138
+ param_hints[p]['min'], param_hints[p]['max'] = limits.iloc[i][p]
1139
+
1140
+ for p in inits_p:
1141
+ # set initial values
1142
+ param_hints[p]['value'] = init_vals.iloc[i][p]
1143
+
1144
+ for p in constants_elmwise:
1145
+ param_hints[p]['value'] = constants_elmwise[p][i]
1146
+ make_params()
1147
+
1148
+ # perform fit (for some reason, sigmoid4_log function generates NaN, which I can not reproduce)
1149
+ # I catch the fitting error (ValueError) and set an r2 score of -1, such that it will go into the refit
1150
+
1151
+ e_elm = e_matrix[:, elm_idx]
1152
+ # set max_nfev to a reasonable range
1153
+ fit = gmodel.fit(qoi, x=e_elm,
1154
+ calc_covar=False, method="leastsq", max_nfev=max_nfev, scale_covar=False)
1155
+ [best_values[k].append(v) for k, v in fit.best_values.items()]
1156
+
1157
+ # calculate goodness-of-fit score
1158
+ if score_type == "R2":
1159
+ # "R2"
1160
+ r2[i] = 1 - np.var(fit.residual) / qoi_var
1161
+ elif score_type == "SR":
1162
+ # Relative standard error of regression
1163
+ r2[i] = 1 - np.linalg.norm(fit.residual) / qoi_norm # TODO: what is this??
1164
+ # np.sqrt(np.sum(fit.residual**2) / len(mep)) # <- this should be the correct S
1165
+ # print(f"Diff: "
1166
+ # f"{np.round((1 - np.linalg.norm(fit.residual) / qoi_norm) -
1167
+ # np.sqrt(np.sum(fit.residual**2) / len(mep)),2)}")
1168
+ elif score_type == 'BIC':
1169
+ r2[i] = -fit.bic
1170
+ elif score_type == 'AIC':
1171
+ r2[i] = fit.aic
1172
+ else:
1173
+ raise ValueError(f"Error score '{score_type}' not implemented ... ")
1174
+ if verbose:
1175
+ if len(elm_idx_list) == 0:
1176
+ elm_time = 0.0
1177
+ else:
1178
+ elm_time = (time.time() - start) / len(elm_idx_list)
1179
+
1180
+ print(f"Proc{str_pref}: > fit_elms workhorse: done "
1181
+ f"({time.time() - start:.2f} s, {elm_time:2.4} s/elm, "
1182
+ f"mean R2: {np.mean(r2):2.2f})")
1183
+ return r2, pd.DataFrame().from_dict(best_values)
1184
+
1185
+
1186
+ # def nl_hdf5_workhorse_idx(elm_idx_list, e_matrix, mep,
1187
+ # fun=sigmoid4, zap_idx=None, init_vals=None, limits=None, log_scale=False,
1188
+ # constants=None, max_nfev=None,
1189
+ # verbose=False):
1190
+ # """
1191
+ # Workhorse for Mass-univariate nonlinear regressions on raw MEP_{AMP} ~ E.
1192
+ # That is, for each element in elm_idx_list, it's E (mag | norm | tan) for each zap regressed on the raw MEP
1193
+ # amplitude. An element wise r2 score is returned.
1194
+ # The function reads the precomputed array of E-MEP data from an .hdf5 file.
1195
+ #
1196
+ # Parameters
1197
+ # ----------
1198
+ # elm_idx_list : list of int
1199
+ # List containing the element indices the fit is performed for
1200
+ # e_matrix : np.ndarray of float [n_zaps x n_ele]
1201
+ # Electric field matrix
1202
+ # mep : np.ndarray of float [n_zaps]
1203
+ # Motor evoked potentials for every stimulation
1204
+ # zap_idx : np.array [n_zaps], default: None
1205
+ # Indices of zaps the congruence factor is calculated with (default: all)
1206
+ # fun : str
1207
+ # A function name of pynibs.exp.Mep (exp0, sigmoid)
1208
+ # init_vals : dict
1209
+ # Dictionary containing the initial values for each element as np.ndarray [len(elm_idx_list)].
1210
+ # The keys are the free parameters of fun, e.g. "x0", "amp", etc
1211
+ # limits : dict
1212
+ # Dictionary containing the limits of each parameter for each element e.g.: limits["x0"][elm_idx] = [min, max]
1213
+ # log_scale : bool, default: False
1214
+ # Log-transform data before fit (necessary for functions defined in the log domain)
1215
+ # constants : dict of <string>:<num>, default: None
1216
+ # key:value pair of model parameters not to optimize.
1217
+ # bad_elm_idx : np.ndarray
1218
+ # Indices of elements not to fit.
1219
+ # max_nfev : int, default: None
1220
+ # Max fits, passed to model.fit() as max_nfev=max_nfev*len(x).
1221
+ # verbose : bool, default: False
1222
+ # Print verbosity information
1223
+ #
1224
+ # Returns
1225
+ # -------
1226
+ # r2 : np.ndarray of float [n_roi, 1]
1227
+ # R2 for each element in elm_idx_list
1228
+ # fit : fit objects
1229
+ # Fit objects returned from the optimizers
1230
+ # """
1231
+ # # from matplotlib import pyplot as plt
1232
+ # str_pref = "Main"
1233
+ # start = time.time()
1234
+ # try:
1235
+ # str_pref = f"{multiprocessing.current_process()._identity[0]:0>2} "
1236
+ # except:
1237
+ # pass
1238
+ #
1239
+ # # use all stimuli if zap_idx is not provided
1240
+ # if zap_idx is not None:
1241
+ # zap_idx = np.arange(e_matrix.shape[0])
1242
+ # mask = np.zeros(e_matrix.shape[0]).astype(bool)
1243
+ # mask[zap_idx] = True
1244
+ # e_matrix = e_matrix[mask]
1245
+ # mep = mep[mask]
1246
+ # n_zaps = e_matrix.shape[0]
1247
+ #
1248
+ # # if verbose:
1249
+ # # print(f"Proc{str_pref}: > regression_nl_hdf5_workhorse: "
1250
+ # # f"starting ({e_matrix.shape[1]} elms / {n_zaps} zaps)")
1251
+ #
1252
+ # best_values = [0] * e_matrix.shape[1]
1253
+ # r2 = np.zeros((e_matrix.shape[1],)) - 10
1254
+ #
1255
+ # if fun == dummy_fun:
1256
+ # return r2, best_values
1257
+ #
1258
+ # if max_nfev is not None:
1259
+ # max_nfev = n_zaps * max_nfev
1260
+ #
1261
+ # if log_scale:
1262
+ # mep = np.log10(mep)
1263
+ #
1264
+ # # set up gmodel
1265
+ # gmodel = Model(fun)
1266
+ #
1267
+ # # create the param_hints ordereddict only once.
1268
+ # for p in gmodel.param_names:
1269
+ # gmodel.param_hints[p] = OrderedDict()
1270
+ #
1271
+ # # compute variance only once
1272
+ # var_qoi = np.var(mep)
1273
+ #
1274
+ # param_hints = gmodel.param_hints
1275
+ # make_params = gmodel.make_params
1276
+ # param_names = gmodel.param_names
1277
+ # if limits is not None:
1278
+ # limits_p = param_names
1279
+ # else:
1280
+ # limits_p = set()
1281
+ #
1282
+ # if init_vals is not None :
1283
+ # inits_p = list(init_vals.keys())
1284
+ # if constants is not None:
1285
+ # inits_p = np.setdiff1d(inits_p, list(constants.keys()))
1286
+ # else:
1287
+ # inits_p = set()
1288
+ #
1289
+ # if constants is not None:
1290
+ # for p in param_names:
1291
+ # if p in constants:
1292
+ # param_hints[p]['value'] = constants[p]
1293
+ # param_hints[p]['vary'] = False
1294
+ #
1295
+ # # # set_param_hint(p, value=constants[p], vary=False)
1296
+ # for i, elm in enumerate(elm_idx_list):
1297
+ # for p in limits_p:
1298
+ # # set limits
1299
+ # # skip the set_param_hint function and do this by hand
1300
+ # param_hints[p]['min'], param_hints[p]['max'] = limits[p][elm]
1301
+ #
1302
+ # for p in inits_p:
1303
+ # # set initial values
1304
+ # param_hints[p]['value'] = init_vals[p][elm]
1305
+ # # set_param_hint(p, value=init_vals[p][elm_idx])
1306
+ #
1307
+ # make_params()
1308
+ #
1309
+ # # perform fit (for some reason, sigmoid4_log function generates NaN, which I can not reproduce)
1310
+ # # I catch the fitting error (ValueError) and set an r2 score of -1, such that it will go into the refit
1311
+ #
1312
+ # try:
1313
+ # # set max_nfev to a reasonable range
1314
+ # fit = gmodel.fit(mep, x=e_matrix[:, i],
1315
+ # calc_covar=False, method="leastsq", max_nfev=max_nfev, scale_covar=False)
1316
+ # best_values[i] = fit.best_values
1317
+ #
1318
+ # # calculate R2 score
1319
+ # # times1.append(fit.nfev)
1320
+ # # mid2 = time.time()
1321
+ # if log_scale:
1322
+ # # this could be optimized by pow(10)
1323
+ # residual = 10 ** fit.best_fit - 10 ** qoi_amplitudes
1324
+ # r2[i] = 1 - np.var(residual) / np.var(10 ** qoi_amplitudes)
1325
+ #
1326
+ # else:
1327
+ # r2[i] = 1 - np.var(fit.residual) / var_qoi
1328
+ #
1329
+ # except ValueError:
1330
+ # print(f"value error: {i}: {elm}")
1331
+ # pass
1332
+ #
1333
+ # if verbose:
1334
+ # print(f"Proc{str_pref}: > regression_nl_hdf5_workhorse: done "
1335
+ # f"({time.time() - start:.2f} s, "
1336
+ # f"{(time.time() - start) / len(elm_idx_list):2.4} s/elm, "
1337
+ # f"mean R2: {np.mean(r2):2.2f})")
1338
+ # # f"Done at {time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())}, pid: {os.getpid()}")
1339
+ # return r2, np.array(best_values)
1340
+
1341
+
1342
+ def nl_hdf5(elm_idx_list=None, fn_reg_hdf5=None, qoi_path_hdf5=None, e_matrix=None, mep=None, zap_idx=None,
1343
+ fun=pynibs.expio.fit_funs.sigmoid4, n_cpu=4, con=None, n_refit=50, return_fits=False, score_type="R2",
1344
+ verbose=False, pool=None, refit_discontinuities=True):
1345
+ """
1346
+ Mass-univariate nonlinear regressions on raw MEP_{AMP} ~ E.
1347
+ That is, for each element in elm_idx_list, it's E (mag | norm | tan) for each zap regressed on the raw MEP
1348
+ amplitude. An element wise r2 score is returned.
1349
+ The function reads the precomputed array of E-MEP data from an .hdf5 file.
1350
+
1351
+ Parameters
1352
+ ----------
1353
+ elm_idx_list : np.ndarray of int, optional
1354
+ List containing the element indices the fit is performed for, if not all.
1355
+ fn_reg_hdf5 : str, optional
1356
+ Filename (incl. path) containing the precomputed E-MEP dataframes.
1357
+ qoi_path_hdf5: Union[str, list[str]], optional
1358
+ Path in .hdf5 file to dataset of electric field qoi e.g.: ["E", "E_norm", "E_tan"]
1359
+ e_matrix : np.ndarray of float, optional
1360
+ (n_zaps, n_ele) Electric field matrix.
1361
+ mep : np.ndarray of float
1362
+ (n_zaps) Motor evoked potentials for every stimulation.
1363
+ zap_idx : np.ndarray, optional
1364
+ (n_used_zaps) Indices of zaps the congruence factor is calculated with, if not all.
1365
+ fun : pynibs.exp.Mep function, default: sigmoid4
1366
+ A function of pynibs.exp.Mep (exp0, sigmoid).
1367
+ n_cpu : int, default: 4
1368
+ Number of threads to use.
1369
+ con : np.ndarray of float, optional
1370
+ (n_roi, 3 or 4) Connectivity matrix of ROI (needed in case of refit because of discontinuity check).
1371
+ n_refit : int, default: 50
1372
+ Maximum number of refits of zero elements. No refit is applied in case of n_refit = 0.
1373
+ return_fits : bool, default: False
1374
+ Return fit objects containing the parameter estimates.
1375
+ score_type : str, default: "R2"
1376
+ Error measure of fit:
1377
+
1378
+ * "R2": R2 score (Model variance / Total variance); linear fits: [0, 1], 1 ... perfect fit
1379
+ * "SR": Relative standard error of regression (1 - Error 2-norm / Data 2-norm); [-Inf, 1], 1 ... perfect fit
1380
+ verbose : bool, default: False
1381
+ Plot output messages.
1382
+ pool : multiprocessing.Pool(), optional
1383
+ Pool instance to use.
1384
+ refit_discontinuities : bool, default: True
1385
+ Run refit for discontinuous elements at the end.
1386
+
1387
+ Returns
1388
+ -------
1389
+ r2 : np.ndarray of float
1390
+ (n_roi, n_qoi) R2 for each element in elm_idx_list.
1391
+ """
1392
+ refit_thr = 1e-6
1393
+
1394
+ if elm_idx_list is None:
1395
+ elm_idx_list = np.arange(e_matrix.shape[1])
1396
+
1397
+ if fun == pynibs.expio.fit_funs.dummy_fun:
1398
+ c_all = np.random.random(len(elm_idx_list))
1399
+
1400
+ if return_fits:
1401
+ best_values = [{"a": 1} for _ in range(len(elm_idx_list))]
1402
+ return c_all, best_values
1403
+ else:
1404
+ return c_all
1405
+
1406
+ if qoi_path_hdf5 is None:
1407
+ qoi_path_hdf5 = ["e_matrix"]
1408
+
1409
+ if fn_reg_hdf5 is not None and qoi_path_hdf5 is not None:
1410
+ df_reg = pd.read_hdf(fn_reg_hdf5, qoi_path_hdf5) # type: pd.DataFrame
1411
+ e_matrix = df_reg.values[:, :-1]
1412
+ mep = df_reg.loc[:, "qoi_amplitudes"].values
1413
+
1414
+ elif e_matrix is None or mep is None:
1415
+ raise ValueError("Please provide e_matrix and mep or fn_reg_hdf5 and qoi_path_hdf5!")
1416
+
1417
+ if n_refit > 0 and con is None:
1418
+ raise ValueError("Please provide connectivity matrix (con) in case of refit!")
1419
+
1420
+ # shuffle elements because some of them need longer to compute
1421
+ # (in this way it is distributed more equally over all cores)
1422
+ np.random.shuffle(elm_idx_list)
1423
+ if not isinstance(elm_idx_list, np.ndarray):
1424
+ elm_idx_list = np.array(elm_idx_list)
1425
+ elm_idx_list_shuffle_idx = np.argsort(elm_idx_list).astype(int)
1426
+ elm_idx_list = elm_idx_list.tolist()
1427
+
1428
+ # Setting up parallelization
1429
+ if not pool:
1430
+ n_cpu_available = multiprocessing.cpu_count()
1431
+ n_cpu = min(n_cpu, n_cpu_available, len(elm_idx_list))
1432
+ pool = multiprocessing.Pool(n_cpu)
1433
+ local_pool = True
1434
+
1435
+ if verbose:
1436
+ print(" > Setting up multiprocessing using {}/{} cores".format(n_cpu, n_cpu_available))
1437
+ else:
1438
+ local_pool = False # close pool only if created locally
1439
+ if verbose:
1440
+ print(" > Using provided pool object")
1441
+ if n_cpu > 1:
1442
+ elm_idx_list_chunks = pynibs.compute_chunks(elm_idx_list, n_cpu)
1443
+ elif len(elm_idx_list) == 1:
1444
+ elm_idx_list_chunks = [elm_idx_list]
1445
+ elif n_cpu == 1:
1446
+ elm_idx_list_chunks = [elm_idx_list]
1447
+ else:
1448
+ raise ValueError(f'n_cpu={n_cpu} is invalid. ')
1449
+
1450
+ # setting up initial values and limits
1451
+ if verbose:
1452
+ print(f" > Setting up initial values and limits for {fun.__name__} function (from {fun.__module__})!")
1453
+
1454
+ log_scale, limits, init_vals, max_vals_refit = get_model_init_values(fun=fun,
1455
+ elm_idx_list=elm_idx_list,
1456
+ e_matrix=e_matrix,
1457
+ mep=mep,
1458
+ mask_e_field=None)
1459
+
1460
+ if verbose:
1461
+ print(" > Calculating congruence factor ...")
1462
+
1463
+ if type(qoi_path_hdf5) is not list:
1464
+ qoi_path_hdf5 = [qoi_path_hdf5]
1465
+
1466
+ c_all = np.zeros((len(elm_idx_list), len(qoi_path_hdf5)))
1467
+ best_values = None
1468
+ # loop over electric field QOIs
1469
+ for i_q, q in enumerate(qoi_path_hdf5):
1470
+ workhorse_partial = partial(fit_elms,
1471
+ e_matrix=e_matrix,
1472
+ mep=mep,
1473
+ fun=fun,
1474
+ zap_idx=zap_idx,
1475
+ init_vals=init_vals,
1476
+ limits=limits,
1477
+ log_scale=log_scale,
1478
+ max_nfev=10,
1479
+ score_type=score_type,
1480
+ verbose=verbose)
1481
+
1482
+ start = time.time()
1483
+ res = pool.map(workhorse_partial, elm_idx_list_chunks, chunksize=1)
1484
+ stop = time.time()
1485
+ if verbose:
1486
+ # print(len(elm_idx_list_chunks))
1487
+ print(f"Determine c-factors for {q} / {qoi_path_hdf5}: {stop - start:2.2f} s")
1488
+
1489
+ best_values = []
1490
+ c = None
1491
+ for i in range(len(res)):
1492
+ if i == 0:
1493
+ c = res[i][0]
1494
+ best_values = res[i][1]
1495
+ else:
1496
+ if c.ndim < 2:
1497
+ c = c[:, np.newaxis]
1498
+ if res[i][0].ndim < 2:
1499
+ c = np.vstack((c, res[i][0][:, np.newaxis]))
1500
+ else:
1501
+ c = np.vstack((c, res[i][0]))
1502
+ best_values += res[i][1]
1503
+
1504
+ # resort c values
1505
+ c = c[elm_idx_list_shuffle_idx]
1506
+ best_values = [best_values[i] for i in elm_idx_list_shuffle_idx]
1507
+ elm_idx_list = np.array(elm_idx_list)[elm_idx_list_shuffle_idx].astype(list)
1508
+
1509
+ # refit elements
1510
+ ####################################################################
1511
+ if n_refit > 0:
1512
+ params = inspect.getfullargspec(fun).args[1:]
1513
+
1514
+ # refit bad elements
1515
+ ####################################################################
1516
+ i_refit = 0
1517
+ while i_refit < n_refit:
1518
+ idx_refit = np.where(c < refit_thr)[0]
1519
+
1520
+ if len(idx_refit) > 0:
1521
+ if verbose:
1522
+ print(f" > Performing refit for {len(idx_refit)} zero elements ...")
1523
+
1524
+ # set random start values
1525
+ if len(idx_refit) > 0:
1526
+ for p in params:
1527
+ for idx_re in idx_refit:
1528
+ init_vals[p][idx_re] = max_vals_refit[p][idx_re] * np.random.rand()
1529
+ # init_vals[p][idx_re] = init_vals[p][idx_re] + \
1530
+ # max_vals_refit[p][idx_re] * (np.random.rand() - 0.5)
1531
+
1532
+ if n_cpu > 1:
1533
+ elm_idx_list_chunks_zero = pynibs.compute_chunks(np.array(elm_idx_list)[idx_refit].tolist(),
1534
+ n_cpu)
1535
+ elif len(elm_idx_list[idx_refit]) == 1:
1536
+ elm_idx_list_chunks_zero = [elm_idx_list[idx_refit]]
1537
+ else: # n_cpu == 1:
1538
+ elm_idx_list_chunks_zero = [elm_idx_list[idx_refit]]
1539
+
1540
+ workhorse_partial = partial(fit_elms,
1541
+ e_matrix=e_matrix,
1542
+ mep=mep,
1543
+ fun=fun,
1544
+ zap_idx=zap_idx,
1545
+ init_vals=init_vals,
1546
+ limits=limits,
1547
+ log_scale=log_scale,
1548
+ max_nfev=100,
1549
+ score_type=score_type,
1550
+ verbose=verbose)
1551
+ start = time.time()
1552
+ res_refit = pool.map(workhorse_partial, elm_idx_list_chunks_zero)
1553
+ stop = time.time()
1554
+
1555
+ if verbose:
1556
+ print(f"Determine c-factors (refit) for {q} / {qoi_path_hdf5}: {stop - start} s")
1557
+
1558
+ best_values_refit = []
1559
+ c_refit = None
1560
+ for i in range(len(res_refit)):
1561
+ if i == 0:
1562
+ c_refit = res_refit[i][0]
1563
+ best_values_refit = res_refit[i][1]
1564
+ else:
1565
+ if c_refit.ndim < 2:
1566
+ c_refit = c_refit[:, np.newaxis]
1567
+ if res_refit[i][0].ndim < 2:
1568
+ c_refit = np.vstack((c_refit, res_refit[i][0][:, np.newaxis]))
1569
+ else:
1570
+ c_refit = np.vstack((c_refit, res_refit[i][0]))
1571
+ best_values_refit += res_refit[i][1]
1572
+
1573
+ # overwrite old values with refitted ones if r2/sr score was higher,
1574
+ # keep old value otherwise
1575
+ for i_c_re, c_re in enumerate(c_refit):
1576
+ if c_re < c[idx_refit[i_c_re]]:
1577
+ c[idx_refit[i_c_re]] = c_re
1578
+ best_values[idx_refit[i_c_re]] = best_values_refit[i_c_re]
1579
+
1580
+ i_refit += 1
1581
+ else:
1582
+ break
1583
+
1584
+ # find discontinuities and refit
1585
+ ##################################################################
1586
+ if refit_discontinuities and len(c) > 1:
1587
+ idx_disc, idx_neighbor = pynibs.get_indices_discontinuous_data(data=c,
1588
+ con=con,
1589
+ neighbor=True,
1590
+ deviation_factor=2)
1591
+ idx_disc = np.array(idx_disc)
1592
+
1593
+ if len(idx_disc) > 0:
1594
+ if verbose:
1595
+ print(f" > Performing refit for {len(idx_disc)} discontinuous elements ...")
1596
+
1597
+ # refit for discontinuous elements
1598
+ if len(idx_disc) > 0:
1599
+ # set start values from neighbors
1600
+ for p in params:
1601
+ for idx_re, idx_ne in zip(idx_disc, idx_neighbor):
1602
+ init_vals[p][idx_re] = best_values[idx_ne][p]
1603
+
1604
+ if n_cpu > 1:
1605
+ elm_idx_list_chunks_disc = pynibs.compute_chunks(np.array(elm_idx_list)[idx_disc].tolist(),
1606
+ n_cpu)
1607
+ elif len(elm_idx_list[idx_disc]) == 1:
1608
+ elm_idx_list_chunks_disc = [elm_idx_list[idx_disc]]
1609
+ else: # n_cpu == 1:
1610
+ elm_idx_list_chunks_disc = [elm_idx_list[idx_disc]]
1611
+
1612
+ start = time.time()
1613
+ res_refit = pool.map(workhorse_partial, elm_idx_list_chunks_disc)
1614
+ stop = time.time()
1615
+
1616
+ if verbose:
1617
+ print(f"Determined c-factors (discontinuous refit) for "
1618
+ f"{q} / {qoi_path_hdf5}: {stop - start} s")
1619
+
1620
+ best_values_refit = []
1621
+ c_refit = None
1622
+ for i in range(len(res_refit)):
1623
+ if i == 0:
1624
+ c_refit = res_refit[i][0]
1625
+ best_values_refit = res_refit[i][1]
1626
+ else:
1627
+ if c_refit.ndim < 2:
1628
+ c_refit = c_refit[:, np.newaxis]
1629
+ if res_refit[i][0].ndim < 2:
1630
+ c_refit = np.vstack((c_refit, res_refit[i][0][:, np.newaxis]))
1631
+ else:
1632
+ c_refit = np.vstack((c_refit, res_refit[i][0]))
1633
+ best_values_refit += res_refit[i][1]
1634
+
1635
+ c[idx_disc] = c_refit
1636
+
1637
+ for j, i in enumerate(idx_disc):
1638
+ best_values[i] = best_values_refit[j]
1639
+
1640
+ c_all[:, i_q] = c.flatten()
1641
+
1642
+ if local_pool:
1643
+ pool.close()
1644
+ pool.join()
1645
+
1646
+ if return_fits:
1647
+ return c_all, best_values
1648
+ else:
1649
+ return c_all
1650
+
1651
+
1652
+ def get_model_init_values(fun, elm_idx_list, e_matrix, mep, mask_e_field=None,
1653
+ rem_empty_hints=True):
1654
+ """
1655
+ Calc appropriate init, limit, and max values for models fits depending on the data. If negative and positive x-data
1656
+ is present in case of e.g. normal component values are set according to the side (positive or negative)
1657
+ where more values are present. When more positive x-axis values are present, negative x-axis values will be ignored.
1658
+ When more negative x-axis values are present, the absolute values will be taken and the positive values are ignored.
1659
+ Only parameters for sigmoid* are optimized.
1660
+
1661
+ Parameters
1662
+ ----------
1663
+ fun : pyfempp.exp.Mep
1664
+ IO curve function object.
1665
+ elm_idx_list : np.ndarray of int
1666
+ (n_used_elms) Array containing the element indices the fit is performed for.
1667
+ e_matrix : np.ndarray of float
1668
+ (n_zaps, n_ele) Electric field matrix.
1669
+ mep : np.ndarray of float
1670
+ (n_zaps) Motor evoked potentials for every stimulation.
1671
+ mask_e_field : np.ndarray of bool, optional
1672
+ (n_zaps, n_ele) Mask indicating for which e-field (and mep) values the fit is performed for.
1673
+ Changes for normal component in each element because of the sign and p-values.
1674
+ If None, all data is used in each element.
1675
+ rem_empty_hints: bool, default: True
1676
+ Remove any non-filled param hints from limits dict.
1677
+
1678
+ Returns
1679
+ -------
1680
+ log_scale : bool
1681
+ Log scale.
1682
+ limits : dict of list
1683
+ (n elm_index_list) Element-wise limit values for function fitting.
1684
+ init_vals : dict of list
1685
+ (n elm_index_list) Element-wise init values for function fitting.
1686
+ max_vals_refit : dict of list
1687
+ (n elm_index_list) Element-wise perturbation range for refitting function.
1688
+ """
1689
+ init_vals = dict()
1690
+ max_vals_refit = dict()
1691
+ limits = dict()
1692
+
1693
+ if mask_e_field is None:
1694
+ mask_e_field = np.ones(e_matrix.shape).astype(bool)
1695
+
1696
+ # get functions-specific argument names
1697
+ params = inspect.getfullargspec(fun).args[1:]
1698
+ for p in params:
1699
+ init_vals[p] = []
1700
+ max_vals_refit[p] = []
1701
+ limits[p] = []
1702
+
1703
+ if fun == pynibs.expio.fit_funs.linear:
1704
+ # linear function starts with generic and same values for each element
1705
+ for _ in range(len(elm_idx_list)):
1706
+ limits["m"].append([-100, 100])
1707
+ limits["n"].append([-100, 100])
1708
+
1709
+ init_vals["m"].append(0.3)
1710
+ init_vals["n"].append(-1)
1711
+
1712
+ max_vals_refit["m"].append(100)
1713
+ max_vals_refit["n"].append(.3)
1714
+
1715
+ elif fun == pynibs.expio.fit_funs.exp0:
1716
+ for _ in range(len(elm_idx_list)):
1717
+ limits["x0"].append([0, 1000])
1718
+ limits["r"].append([1e-12, 100])
1719
+
1720
+ # init_vals["x0"].append(40)
1721
+ init_vals["x0"].append(1)
1722
+ init_vals["r"].append(.1)
1723
+
1724
+ # max_vals_refit["x0"].append(100)
1725
+ max_vals_refit["x0"].append(10)
1726
+ max_vals_refit["r"].append(.2)
1727
+
1728
+ elif fun == pynibs.expio.fit_funs.sigmoid or fun == pynibs.expio.fit_funs.sigmoid_log:
1729
+ # limits["x0"] = [[0, 1000] for _ in range(len(elm_idx_list))]
1730
+ # limits["amp"] = [[1e-12, 1000] for _ in range(len(elm_idx_list))]
1731
+ # limits["r"] = [[1e-12, 100] for _ in range(len(elm_idx_list))]
1732
+
1733
+ for _, elm in enumerate(elm_idx_list):
1734
+ e_elm = np.abs(e_matrix[mask_e_field[:, elm], elm])
1735
+
1736
+ # x0 first guess: center between e_min, e_max
1737
+ e_min = np.min(e_elm)
1738
+ e_max = np.max(e_elm)
1739
+
1740
+ x_0 = e_min + np.max(e_elm) / 2
1741
+ init_vals["x0"].append(x_0)
1742
+
1743
+ amp = np.max(mep)
1744
+ init_vals["amp"].append(amp) # largest MEP
1745
+
1746
+ # r0 initial: slope from max_mep/min_mep over middle 50% of e-range
1747
+ # r = 8 / np.max(e_elm) * ((np.max(mep) - np.min(mep)) / (e_max - e_min))
1748
+ r = (np.max(mep) - np.min(mep)) / (e_max - e_min)
1749
+ init_vals["r"].append(r)
1750
+
1751
+ # max_vals_refit["x0"].append(200)
1752
+ max_vals_refit["x0"].append(3 * x_0)
1753
+ max_vals_refit["amp"].append(3 * amp)
1754
+ max_vals_refit["r"].append(3 * r)
1755
+
1756
+ # set upper bound of limits in relation to init_vals
1757
+ factor = 100
1758
+ limits["x0"] = [[0, init_vals["x0"][i] * 3] for i in range(len(elm_idx_list))]
1759
+ limits["amp"] = [[1e-12, init_vals["amp"][i] * 3] for i in range(len(elm_idx_list))]
1760
+ limits["r"] = [[1e-12, init_vals["r"][i] * factor] for i in range(len(elm_idx_list))]
1761
+
1762
+ elif fun == pynibs.expio.fit_funs.sigmoid4 or fun == pynibs.expio.fit_funs.sigmoid4_log:
1763
+ for _, elm in enumerate(elm_idx_list):
1764
+ e_elm = np.abs(e_matrix[mask_e_field[:, elm], elm])
1765
+
1766
+ # x0 first guess: center between e_min, e_max
1767
+ e_min = np.min(e_elm)
1768
+ e_max = np.max(e_elm)
1769
+
1770
+ x_0 = e_min + np.max(e_elm) / 2
1771
+ init_vals["x0"].append(x_0)
1772
+
1773
+ amp = np.max(mep)
1774
+ init_vals["amp"].append(amp) # largest MEP
1775
+
1776
+ # r0 initial: slope from max_mep/min_mep over middle 50% of e-range
1777
+ # r = 8 / np.max(e_elm) * ((np.max(mep) - np.min(mep)) / (e_max - e_min))
1778
+ r = (np.max(mep) - np.min(mep)) / (e_max - e_min)
1779
+ init_vals["r"].append(r)
1780
+
1781
+ # max_vals_refit["x0"].append(200)
1782
+ max_vals_refit["x0"].append(3 * x_0)
1783
+ max_vals_refit["amp"].append(3 * amp)
1784
+ max_vals_refit["r"].append(3 * r)
1785
+
1786
+ init_vals["y0"].append(np.min(mep))
1787
+
1788
+ # if init_vals["y0"][-1] <= 0:
1789
+ # init_vals["y0"][-1] = 1e-3
1790
+
1791
+ max_vals_refit["y0"].append(np.min(mep) * 3)
1792
+
1793
+ # set upper bound of limits in relation to init_vals
1794
+ factor = 100
1795
+ limits["x0"] = [[0, init_vals["x0"][i] * 3] for i in range(len(elm_idx_list))]
1796
+ limits["amp"] = [[1e-12, init_vals["amp"][i] * 3] for i in range(len(elm_idx_list))]
1797
+ limits["r"] = [[1e-12, init_vals["r"][i] * factor] for i in range(len(elm_idx_list))]
1798
+ y0_init = init_vals["y0"][0]
1799
+ limits["y0"] = [[np.abs(y0_init) * -2, np.abs(y0_init) * 2] for _ in range(len(elm_idx_list))]
1800
+
1801
+ elif fun == pynibs.expio.fit_funs.dummy_fun:
1802
+ for _ in range(len(elm_idx_list)):
1803
+ limits["a"].append([0, 1])
1804
+ init_vals["a"].append(1)
1805
+ max_vals_refit["a"].append(1)
1806
+ else:
1807
+ raise NotImplementedError(fun)
1808
+
1809
+ if fun == pynibs.expio.fit_funs.sigmoid_log or fun == pynibs.expio.fit_funs.sigmoid4_log:
1810
+ log_scale = True
1811
+ else:
1812
+ log_scale = False
1813
+
1814
+ if rem_empty_hints:
1815
+ # remove params hints for params that are not taken care of here
1816
+ keys_2_remove = [k for k, v in limits.items() if not v]
1817
+ for k in keys_2_remove:
1818
+ del limits[k]
1819
+ del init_vals[k]
1820
+ del max_vals_refit[k]
1821
+
1822
+ return log_scale, limits, init_vals, max_vals_refit
1823
+
1824
+
1825
+ def nl_hdf5_single_core_write(i, elm_idx_list, fn_reg_hdf5=None, qoi_path_hdf5=None, e_matrix=None,
1826
+ mep=None,
1827
+ fun=pynibs.expio.fit_funs.sigmoid4,
1828
+ con=None, n_refit=50, return_fits=False, constants=None, verbose=False,
1829
+ seed=None, stepdown=False, score_type='R2', return_progress=False, geo=None):
1830
+ """
1831
+ Perform single-core processing for non-linear optimization and write results to an HDF5 file.
1832
+
1833
+ Parameters
1834
+ ----------
1835
+ i : int
1836
+ The index of the subset of data to process.
1837
+ elm_idx_list : list of int
1838
+ List of element indices.
1839
+ fn_reg_hdf5 : str, optional
1840
+ Path to the registration HDF5 file.
1841
+ qoi_path_hdf5 : str, optional
1842
+ Path to the HDF5 file containing the quantity of interest (QOI) data.
1843
+ e_matrix : np.ndarray, optional
1844
+ The electromagnetic forward matrix.
1845
+ mep : pandas.DataFrame, optional
1846
+ The motor evoked potential (MEP) data.
1847
+ fun : function, default: sigmoid4
1848
+ The non-linear optimization function to use (default: pynibs.sigmoid4).
1849
+ con : object, optional
1850
+ Constraints for optimization, if applicable.
1851
+ n_refit : int, default: 50
1852
+ Number of refitting iterations.
1853
+ return_fits : bool, default: False
1854
+ If True, return fits alongside the coefficients.
1855
+ constants : dict, optional
1856
+ Constants used in the optimization function.
1857
+ verbose : bool, default: False
1858
+ If True, print verbose messages.
1859
+ seed : int, optional
1860
+ The random seed for optimization.
1861
+ stepdown : bool, default: False
1862
+ If True, use a stepdown approach for optimization.
1863
+ score_type : str, default: 'R2'
1864
+ The type of score to use for optimization.
1865
+ return_progress : bool, default: False
1866
+ If True, return progress data.
1867
+ geo : object, optional
1868
+ Geometry data.
1869
+
1870
+ Returns
1871
+ -------
1872
+ dict
1873
+ A dictionary containing the following elements:
1874
+
1875
+ * 'progress_data': Progress data if 'return_progress' is True.
1876
+ * 'best_values': Best optimization values if 'return_fits' is True.
1877
+ """
1878
+ # get string prefix for useful multiprocessing logging
1879
+ str_pref = "Main"
1880
+ try:
1881
+ str_pref = f"{multiprocessing.current_process()._identity[0]:0>2} "
1882
+ except Exception:
1883
+ pass
1884
+
1885
+ # compute c
1886
+ if stepdown:
1887
+ res = stepdown_approach(zap_idx=z[i],
1888
+ elm_idx_list=elm_idx_list,
1889
+ fn_reg_hdf5=fn_reg_hdf5,
1890
+ qoi_path_hdf5=qoi_path_hdf5,
1891
+ e_matrix=e_matrix[z[i]],
1892
+ mep=mep.iloc[z[i]],
1893
+ fun=fun,
1894
+ con=con,
1895
+ n_refit=n_refit, return_fits=return_fits,
1896
+ constants=constants,
1897
+ verbose=verbose, seed=seed,
1898
+ score_type=score_type,
1899
+ return_progress=return_progress,
1900
+ geo=geo)
1901
+ else:
1902
+ res = nl_hdf5_single_core(zap_idx=z[i],
1903
+ elm_idx_list=elm_idx_list,
1904
+ fn_reg_hdf5=fn_reg_hdf5,
1905
+ qoi_path_hdf5=qoi_path_hdf5,
1906
+ e_matrix=e_matrix,
1907
+ mep=mep,
1908
+ fun=fun,
1909
+ con=con,
1910
+ n_refit=n_refit, return_fits=return_fits, constants=constants,
1911
+ verbose=verbose, seed=seed)
1912
+
1913
+ c = res['c']
1914
+ best_values = res['best_values'] if 'best_values' in res else None
1915
+ progress_data = res['progress'] if 'progress' in res else {}
1916
+ stats = res['stats'] if 'stats' in res else {}
1917
+
1918
+ # write to hdf5, threadsafe
1919
+ if verbose:
1920
+ print(f"{str_pref}: > Writing results for {len(z[i])} zaps to {fn}.")
1921
+ with lock:
1922
+ with h5py.File(fn, 'a') as f:
1923
+ f.create_dataset(f'c/{len(z[i])}', data=c)
1924
+ for key in stats:
1925
+ stat = stats[key]
1926
+ if not isinstance(stat, np.ndarray):
1927
+ stat = np.array([stat])
1928
+ f.create_dataset(f'{key}/{len(z[i])}', data=stat)
1929
+
1930
+ if verbose:
1931
+ print(f"{str_pref}: > Writing results for {len(z[i])} zaps done.")
1932
+
1933
+ return {'progress_data': progress_data,
1934
+ 'best_values': best_values}
1935
+
1936
+
1937
+ def get_bad_elms(x, y, method="lstsq", verbose=False):
1938
+ """
1939
+ This does an element-wise fast linear regression fit to identify bad elements.
1940
+ Bad is defined here as a negative slope.
1941
+
1942
+ x : np.ndarray of float
1943
+ (n_zaps, n_ele) Electric field matrix.
1944
+ y : np.ndarray of float
1945
+ (n_zaps) Motor evoked potentials for every stimulation.
1946
+ method : str, default: "lstsq"
1947
+ Which method to use. (numpy.linalg.)lstsq, (scipy.stats.)linregress, or pinv
1948
+ verbose : bool, default: False
1949
+ Indicating verbosity of messages.
1950
+
1951
+ Returns
1952
+ -------
1953
+ idx: np.ndarray
1954
+ Indices of bad elements.
1955
+ """
1956
+ str_pref = "Main"
1957
+ try:
1958
+ str_pref = f"{multiprocessing.current_process()._identity[0]:0>2} "
1959
+ except:
1960
+ pass
1961
+ start = time.time()
1962
+ if method == "linregress":
1963
+ idx = [i for i in range(x.shape[1]) if
1964
+ linregress(x[:, i], y).slope < 0]
1965
+ elif method == "lstsq":
1966
+ idx = [i for i in range(x.shape[1]) if
1967
+ lstsq(np.vstack([x[:, i], np.ones(x.shape[0])]).T, y, rcond=None)[0][0] < 0]
1968
+ elif method == 'pinv':
1969
+ warnings.warn("pinv method is untested.")
1970
+ idx = [i for i in range(x.shape[1]) if
1971
+ pinv(x[:, i].reshape(x.shape[0], 1)).dot(y)[0] < 0]
1972
+ else:
1973
+ raise NotImplementedError(f'Method {method} unknown.')
1974
+
1975
+ if verbose:
1976
+ print(
1977
+ f"Proc{str_pref}: > {len(idx)}/{x.shape[1]} bad elms removed from fitting ({time.time() - start:2.4} s).")
1978
+
1979
+ return idx
1980
+
1981
+
1982
+ def nl_hdf5_single_core(zap_idx, elm_idx_list, fn_reg_hdf5=None, qoi_path_hdf5=None, e_matrix=None, mep=None,
1983
+ fun=pynibs.expio.fit_funs.sigmoid4,
1984
+ con=None, n_refit=50, return_fits=False, constants=None, verbose=False, seed=None,
1985
+ rem_bad_elms=True, return_e_field_stats=True):
1986
+ """
1987
+ Mass-univariate nonlinear regressions on raw MEP_{AMP} ~ E.
1988
+ That is, for each element in elm_idx_list, it's E (mag | norm | tan) for each zap regressed on the raw MEP
1989
+ amplitude. An element wise r2 score is returned.
1990
+ The function reads the precomputed array of E-MEP data from an .hdf5 file.
1991
+
1992
+ Parameters
1993
+ ----------
1994
+ zap_idx : np.ndarray of int
1995
+ (n_zaps_used) Indices of zaps the congruence factor is calculated with.
1996
+ elm_idx_list : np.ndarray of int
1997
+ List containing the element indices the fit is performed for.
1998
+ fn_reg_hdf5 : str, optional
1999
+ Filename (incl. path) containing the precomputed E-MEP dataframes
2000
+ qoi_path_hdf5: str or list of str, optional
2001
+ Path in .hdf5 file to dataset of electric field qoi e.g.: ["E", "E_norm", "E_tan"].
2002
+ e_matrix : np.ndarray of float, optional
2003
+ (n_zaps, n_ele) Electric field matrix.
2004
+ mep : np.ndarray of float, optional
2005
+ (n_zaps) Motor evoked potentials for every stimulation.
2006
+ fun : function object, default: sigmoid4
2007
+ A function of pynibs.exp.Mep (exp0, sigmoid).
2008
+ con : np.ndarray of float, optional
2009
+ (n_roi, 3 or 4) Connectivity matrix of ROI (needed in case of refit because of discontinuity check).
2010
+ n_refit : int, default: 50
2011
+ Maximum number of refits of zero elements. No refit is applied in case of n_refit = 0.
2012
+ return_fits : bool, default: False
2013
+ Return fit objects containing the parameter estimates
2014
+ constants : dict of <string>:<num>, optional
2015
+ key:value pair of model parameters not to optimize.
2016
+ verbose : bool, default: False
2017
+ Plot output messages.
2018
+ seed: int, optional
2019
+ Seed to use.
2020
+ rem_bad_elms: bool, default: True
2021
+ Remove elements based on a fast linear regression slope estimation.
2022
+ return_e_field_stats : bool, default: True
2023
+ Return some stats on the efield variance
2024
+
2025
+ Returns
2026
+ -------
2027
+ r2 : np.ndarray of float
2028
+ (n_roi, n_qoi) R2 for each element in elm_idx_list.
2029
+ """
2030
+ starttime = time.time()
2031
+ str_pref = "Main"
2032
+ try:
2033
+ str_pref = f"{multiprocessing.current_process()._identity[0]:0>2} "
2034
+ except:
2035
+ pass
2036
+ best_values = None
2037
+
2038
+ if qoi_path_hdf5 is None:
2039
+ qoi_path_hdf5 = ["e_matrix"]
2040
+
2041
+ if fn_reg_hdf5 is not None and qoi_path_hdf5 is not None:
2042
+ df_reg = pd.read_hdf(fn_reg_hdf5, qoi_path_hdf5)
2043
+ e_matrix = df_reg.values[:, :-1]
2044
+ mep = df_reg.loc[:, "qoi_amplitudes"].values
2045
+
2046
+ elif e_matrix is None or mep is None:
2047
+ raise ValueError(f"Proc{str_pref}: Please provide e_matrix and mep or fn_reg_hdf5 and qoi_path_hdf5!")
2048
+
2049
+ if n_refit > 0 and con is None:
2050
+ raise ValueError(f"Proc{str_pref}: Please provide connectivity matrix (con) in case of refit!")
2051
+
2052
+ # shuffle elements because some of them need longer to compute
2053
+ # (in this way it is distributed more equally over all cores)
2054
+ if seed:
2055
+ np.random.seed(seed)
2056
+ np.random.shuffle(elm_idx_list)
2057
+ if not isinstance(elm_idx_list, np.ndarray):
2058
+ elm_idx_list = np.array(elm_idx_list)
2059
+ elm_idx_list_shuffle_idx = np.argsort(elm_idx_list)
2060
+
2061
+ # setting up initial values and limits
2062
+ if verbose:
2063
+ print(
2064
+ f"Proc{str_pref}: > Setting up initial values and limits "
2065
+ f"for {fun.__name__} function (from {fun.__module__}).")
2066
+
2067
+ log_scale, limits, init_vals, max_vals_refit = get_model_init_values(fun,
2068
+ elm_idx_list,
2069
+ e_matrix,
2070
+ mep)
2071
+
2072
+ if verbose:
2073
+ print(f"Proc{str_pref}: > c-map for {len(zap_idx)}: starting.")
2074
+
2075
+ n_elm = len(elm_idx_list)
2076
+ if type(qoi_path_hdf5) is not list:
2077
+ qoi_path_hdf5 = [qoi_path_hdf5]
2078
+
2079
+ c_all = np.zeros((len(elm_idx_list), len(qoi_path_hdf5)))
2080
+
2081
+ if rem_bad_elms:
2082
+ bad_elm_idx = get_bad_elms(e_matrix[zap_idx], mep.iloc[zap_idx].values, method='lstsq', verbose=True)
2083
+ else:
2084
+ bad_elm_idx = np.empty((0,))
2085
+
2086
+ # get e field stats
2087
+ e_stats_dicts = None
2088
+ if return_e_field_stats:
2089
+ mc = pynibs.mutual_coherence(e_matrix.transpose())
2090
+ _, sv_rat, _ = svd(e_matrix)
2091
+ sv_rat = np.max(sv_rat) / np.min(sv_rat)
2092
+ e_stats_dicts = {'mc': mc,
2093
+ 'sv_rat': sv_rat}
2094
+
2095
+ # fast return for testruns
2096
+ if fun == pynibs.expio.fit_funs.dummy_fun:
2097
+ c_all = np.random.random(n_elm)
2098
+ ret = (c_all,)
2099
+ if return_fits:
2100
+ best_values = [{"a": 1} for _ in range(n_elm)]
2101
+ ret += (best_values,)
2102
+ if return_e_field_stats:
2103
+ ret += (e_stats_dicts,)
2104
+ return ret
2105
+
2106
+ # loop over electric field QOIs
2107
+ for i_q, q in enumerate(qoi_path_hdf5):
2108
+
2109
+ start = time.time()
2110
+ res = fit_elms(
2111
+ elm_idx_list=elm_idx_list,
2112
+ e_matrix=e_matrix,
2113
+ mep=mep,
2114
+ fun=fun,
2115
+ zap_idx=zap_idx,
2116
+ init_vals=init_vals,
2117
+ limits=limits,
2118
+ log_scale=log_scale,
2119
+ constants=constants,
2120
+ max_nfev=10,
2121
+ verbose=verbose,
2122
+ bad_elm_idx=bad_elm_idx)
2123
+ stop = time.time()
2124
+
2125
+ if verbose:
2126
+ print(f"Proc{str_pref}: > Determine c-factors done. ({stop - start:2.2f} s)")
2127
+
2128
+ c = res[0]
2129
+ best_values = res[1]
2130
+
2131
+ # resort c values
2132
+ c = c[elm_idx_list_shuffle_idx]
2133
+ best_values = np.array([best_values[i] for i in elm_idx_list_shuffle_idx])
2134
+ elm_idx_list = elm_idx_list[elm_idx_list_shuffle_idx]
2135
+
2136
+ # refit elements
2137
+ ####################################################################
2138
+ if n_refit > 0:
2139
+ params = inspect.getfullargspec(fun).args[1:]
2140
+
2141
+ # refit zero elements
2142
+ ####################################################################
2143
+ i_refit_zero = 0
2144
+ while i_refit_zero < n_refit:
2145
+ idx_zero = np.where(c < 1e-6)[0]
2146
+ idx_zero = np.setdiff1d(idx_zero, bad_elm_idx)
2147
+
2148
+ # refit zero elements
2149
+ if len(idx_zero) > 0:
2150
+ if verbose:
2151
+ print(f"Proc{str_pref}: > Zero refit {i_refit_zero}/{n_refit} "
2152
+ f"({len(idx_zero)} elements): starting.")
2153
+
2154
+ # set random start values
2155
+ for p in params:
2156
+ for idx_ze in idx_zero:
2157
+ init_vals[p][idx_ze] = max_vals_refit[p][idx_ze] * np.random.rand()
2158
+
2159
+ start = time.time()
2160
+ c_refit, best_values_refit = fit_elms(
2161
+ elm_idx_list=elm_idx_list[idx_zero],
2162
+ e_matrix=e_matrix,
2163
+ mep=mep,
2164
+ fun=fun,
2165
+ zap_idx=zap_idx,
2166
+ init_vals=init_vals,
2167
+ limits=limits,
2168
+ log_scale=log_scale,
2169
+ max_nfev=10,
2170
+ verbose=verbose)
2171
+ stop = time.time()
2172
+
2173
+ if verbose:
2174
+ print(
2175
+ f"Proc{str_pref}: > Zero refit {i_refit_zero}/{n_refit} "
2176
+ f"({len(idx_zero)} elements): done. "
2177
+ f"({stop - start:.2f} s, "
2178
+ f"{(stop - start) / len(idx_zero):.2f} / elm, "
2179
+ f"{np.sum(c_refit > 1e-6)} > 0 )")
2180
+
2181
+ # overwrite old values with refitted ones if r2 score was higher, keep old value otherwise
2182
+ for idx_c_ref, c_ref in enumerate(c_refit):
2183
+ if c_ref > c[idx_zero[idx_c_ref]]:
2184
+ # print(f"{idx_zero[idx_c_ref]}: {c[idx_zero[idx_c_ref]]} -> {c_ref}")
2185
+ c[idx_zero[idx_c_ref]] = c_ref
2186
+ best_values[idx_zero[idx_c_ref]] = best_values_refit[idx_c_ref]
2187
+
2188
+ i_refit_zero += 1
2189
+
2190
+ # find discontinuities and refit
2191
+
2192
+ idx_disc, idx_neighbor = pynibs.get_indices_discontinuous_data(data=c, con=con, neighbor=True,
2193
+ deviation_factor=2, min_val=1e-12,
2194
+ not_fitted_elms=bad_elm_idx)
2195
+ idx_disc = np.setdiff1d(idx_disc, bad_elm_idx)
2196
+
2197
+ if len(idx_disc) > 0:
2198
+
2199
+ if verbose:
2200
+ print(f"Proc{str_pref}: > Discontinuous refit ({len(idx_disc)} elements): starting.")
2201
+
2202
+ # set start values from neighbors
2203
+ for p in params:
2204
+ for idx_re, idx_ne in zip(idx_disc, idx_neighbor):
2205
+ init_vals[p][idx_re] = best_values[idx_ne][p]
2206
+
2207
+ start = time.time()
2208
+ c_refit, best_values_refit = fit_elms(
2209
+ elm_idx_list=elm_idx_list[idx_disc],
2210
+ e_matrix=e_matrix,
2211
+ mep=mep,
2212
+ fun=fun,
2213
+ zap_idx=zap_idx,
2214
+ init_vals=init_vals,
2215
+ limits=limits,
2216
+ log_scale=log_scale,
2217
+ max_nfev=10,
2218
+ verbose=verbose)
2219
+
2220
+ stop = time.time()
2221
+
2222
+ if verbose:
2223
+ print(
2224
+ f"Proc{str_pref}: > Discontinuous refit ({len(idx_disc)} elements): "
2225
+ f"done ({stop - start:.2f} s)")
2226
+
2227
+ # overwrite old values with refitted ones
2228
+ c[idx_disc] = c_refit
2229
+ best_values[idx_disc] = best_values_refit
2230
+
2231
+ c_all[:, i_q] = c.flatten()
2232
+
2233
+ endtime = time.time()
2234
+ print(f"Proc{str_pref}: > c-map for {len(zap_idx)} zaps done ({endtime - starttime:2.2f}s). ")
2235
+
2236
+ ret = (c_all,)
2237
+ if return_fits:
2238
+ best_values = [{"a": 1} for _ in range(n_elm)]
2239
+ ret += (best_values,)
2240
+ if return_e_field_stats:
2241
+ ret += (e_stats_dicts,)
2242
+ return ret
2243
+
2244
+
2245
+ def stepdown_approach(zap_idx, elm_idx_list, fn_reg_hdf5=None, qoi_path_hdf5=None, e_matrix=None,
2246
+ mep=None,
2247
+ fun=pynibs.expio.fit_funs.sigmoid4,
2248
+ con=None, n_refit=50, return_fits=False, constants=None, verbose=False,
2249
+ seed=None,
2250
+ rem_bad_elms=True, return_e_field_stats=True,
2251
+ score_type='R2', return_progress=False, smooth_data=True, geo=None):
2252
+ """
2253
+ Mass-univariate nonlinear regressions on raw MEP_{AMP} ~ E in a stepdown manner to speed up computation.
2254
+
2255
+ Initially, one set of fits is done for the complete dataset. Afterwards, the best 1% of the elements are used
2256
+ as initial fitting parameters for their neighboring elements. Then, neighboring elements are fitted accordingly
2257
+ and iteratively.
2258
+ Finally, discontinuous elements are refitted until a smooth map is found or n_refit is hit.
2259
+ Can be sped up with rem_bad_elms that computes a fast linear fit to identify elements with a negative slope.
2260
+ The function reads the precomputed array of E-MEP data from an .hdf5 file.
2261
+
2262
+ Parameters
2263
+ ----------
2264
+ elm_idx_list : np.ndarray of int
2265
+ List containing the element indices the fit is performed for.
2266
+ fn_reg_hdf5 : str
2267
+ Filename (incl. path) containing the precomputed E-MEP dataframes.
2268
+ qoi_path_hdf5: str or list of str, optional
2269
+ Path in .hdf5 file to dataset of electric field qoi e.g.: ["E", "E_norm", "E_tan"].
2270
+ e_matrix : np.ndarray of float
2271
+ (n_zaps, n_ele) Electric field matrix.
2272
+ mep : np.ndarray of float
2273
+ (n_zaps) Motor evoked potentials for every stimulation.
2274
+ zap_idx : np.array, optional
2275
+ (n_zaps) Indices of zaps the congruence factor is calculated with (default: all).
2276
+ fun : function object
2277
+ A function of pynibs.exp.Mep (exp0, sigmoid).
2278
+ con : np.ndarray of float, optional
2279
+ (n_elm_roi, 3 or 4) Connectivity matrix of ROI (needed in case of refit because of discontinuity check)
2280
+ n_refit : int, default: 50
2281
+ Maximum number of refits of zero elements. No refit is applied in case of n_refit = 0.
2282
+ return_fits : bool, default: False
2283
+ Return fit objects containing the parameter estimates
2284
+ constants : dict of <string>:<num>, optional
2285
+ key:value pair of model parameters not to optimize.
2286
+ verbose : bool, default: False
2287
+ Plot output messages.
2288
+ seed: int, optional
2289
+ Seed to use.
2290
+ rem_bad_elms: bool, default: True
2291
+ Remove elements based on a fast linear regression slope estimation.
2292
+ return_e_field_stats : bool, default: True
2293
+ Return some stats on the efield variance
2294
+ score_type : str, default: "R2"
2295
+ Error measure of fit:
2296
+
2297
+ * "R2": R2 score (Model variance / Total variance); linear fits: [0, 1], 1 ... perfect fit
2298
+ * "SR": Relative standard error of regression (1 - Error 2-norm / Data 2-norm); [-Inf, 1], 1 ... perfect fit
2299
+ * "rho": Spearman correlation coefficient [-1, 1]; finds any monotonous correlation (0 means no correlation)
2300
+ return_progress : bool, default: False
2301
+ Return c maps for all steps to allow visualization over e-fitting over timesteps.
2302
+ smooth_data : bool, default: False
2303
+ Smooth c-map as final step.
2304
+ geo : object, optional
2305
+ Geometry data.
2306
+
2307
+ Returns
2308
+ -------
2309
+ dict:
2310
+
2311
+ * r2 : np.ndarray of float
2312
+ (n_roi, n_qoi) R2 for each element in elm_idx_list.
2313
+ * best_values: list of dict
2314
+ Fit information, if wanted.
2315
+ * stats : dict
2316
+ If wanted:
2317
+ 'mc': float
2318
+ Mutual coherence for e fields.
2319
+ 'sv_rat' : float
2320
+ SVD singular value ratio.
2321
+ * progress : cmaps for each step.
2322
+ """
2323
+ starttime = time.time()
2324
+
2325
+ # get string prefix for useful multiprocessing logging
2326
+ str_pref = "Main"
2327
+ try:
2328
+ str_pref = f"{multiprocessing.current_process()._identity[0]:0>2} "
2329
+ except:
2330
+ pass
2331
+
2332
+ if qoi_path_hdf5 is None:
2333
+ qoi_path_hdf5 = ["e_matrix"]
2334
+
2335
+ if fn_reg_hdf5 is not None and qoi_path_hdf5 is not None:
2336
+ df_reg = pd.read_hdf(fn_reg_hdf5, qoi_path_hdf5)
2337
+ e_matrix = df_reg.values[:, :-1]
2338
+ mep = df_reg.loc[:, "qoi_amplitudes"].values
2339
+
2340
+ elif e_matrix is None or mep is None:
2341
+ raise ValueError(f"Proc{str_pref}: Please provide e_matrix and mep or fn_reg_hdf5 and qoi_path_hdf5!")
2342
+
2343
+ if n_refit > 0 and con is None:
2344
+ raise ValueError(f"Proc{str_pref}: Please provide connectivity matrix (con) in case of refit!")
2345
+
2346
+ if not isinstance(elm_idx_list, np.ndarray):
2347
+ elm_idx_list = np.array(elm_idx_list)
2348
+
2349
+ # setting up initial values and limits
2350
+ if verbose:
2351
+ print(
2352
+ f"Proc{str_pref}: > Setting up initial values and limits "
2353
+ f"for {fun.__name__} function (from {fun.__module__}).")
2354
+
2355
+ log_scale, limits, init_vals, max_vals_refit = get_model_init_values(fun,
2356
+ elm_idx_list,
2357
+ e_matrix,
2358
+ mep)
2359
+ # convert dict-of-lists to dict-of-dicts
2360
+
2361
+ init_vals = pd.DataFrame().from_dict(init_vals)
2362
+ limits = pd.DataFrame().from_dict(limits)
2363
+
2364
+ n_elm = len(elm_idx_list)
2365
+ if type(qoi_path_hdf5) is not list:
2366
+ qoi_path_hdf5 = [qoi_path_hdf5]
2367
+ c_all = np.zeros((n_elm, len(qoi_path_hdf5)))
2368
+
2369
+ if e_matrix.shape[0] != len(zap_idx):
2370
+ e_matrix = e_matrix[zap_idx]
2371
+ mep = mep[zap_idx]
2372
+
2373
+ # get bad elms by checking their linear slope fit
2374
+ if rem_bad_elms:
2375
+ bad_elm_idx = set(get_bad_elms(e_matrix[:, elm_idx_list], mep.values, method='lstsq', verbose=verbose))
2376
+ else:
2377
+ bad_elm_idx = set()
2378
+
2379
+ # get e field stats
2380
+ e_stats_dicts = None
2381
+ if return_e_field_stats:
2382
+ stats_start = time.time()
2383
+ mc = pynibs.mutual_coherence(e_matrix.transpose())
2384
+
2385
+ sv_rat = svd(e_matrix, check_finite=False, compute_uv=False)
2386
+ sv_rat = np.max(sv_rat) / np.min(sv_rat)
2387
+ e_stats_dicts = {'mc': mc,
2388
+ 'sv_rat': sv_rat}
2389
+ stats_end = time.time()
2390
+ if verbose:
2391
+ print(f"Proc{str_pref} : > Efield stats computation took {stats_end - stats_start:2.2f} s.")
2392
+
2393
+ # fast return for testruns
2394
+ if fun == pynibs.expio.fit_funs.dummy_fun:
2395
+ c_all = np.random.random(n_elm)
2396
+ ret = (c_all,)
2397
+ if return_fits:
2398
+ best_values = [{"a": 1} for _ in range(n_elm)]
2399
+ ret += (best_values,)
2400
+ if return_e_field_stats:
2401
+ ret += (e_stats_dicts,)
2402
+ return ret
2403
+
2404
+ if verbose:
2405
+ print(f"Proc{str_pref}: > c-map for {e_matrix.shape[0]}: starting.")
2406
+ if return_progress:
2407
+ return_progress_lst = []
2408
+ # loop over electric field QOIs
2409
+ for i_q, q in enumerate(qoi_path_hdf5):
2410
+
2411
+ start = time.time()
2412
+ # get initial c-map
2413
+ start_sample = np.random.choice(elm_idx_list, int(.1 * len(elm_idx_list)), replace=False)
2414
+ # c, best_values = np.zeros(elm_idx_list.shape), np.zeros(elm_idx_list.shape).astype(object)
2415
+
2416
+ c, best_values = np.zeros(elm_idx_list.shape), init_vals.copy()
2417
+ if return_progress:
2418
+ return_progress_lst.append(c.copy())
2419
+ c[start_sample], best_values.iloc[start_sample] = fit_elms(
2420
+ elm_idx_list=start_sample,
2421
+ e_matrix=e_matrix,
2422
+ mep=mep,
2423
+ fun=fun,
2424
+ init_vals=init_vals,
2425
+ limits=limits,
2426
+ log_scale=log_scale,
2427
+ constants=constants,
2428
+ max_nfev=10,
2429
+ verbose=verbose,
2430
+ bad_elm_idx=bad_elm_idx,
2431
+ score_type=score_type)
2432
+ if return_progress:
2433
+ return_progress_lst.append(c.copy())
2434
+ stop = time.time()
2435
+
2436
+ if verbose:
2437
+ print(f"Proc{str_pref}: > Initial c-factor map done. ({stop - start:2.2f} s)")
2438
+
2439
+ # pick the best elements and compute their neibors' fits
2440
+ n_top = int(len(start_sample) * .2)
2441
+
2442
+ elms_done = start_sample[np.argpartition(c[start_sample], -n_top)[-n_top:]]
2443
+ mask = np.ones(c.shape, np.bool)
2444
+ mask[elms_done] = 0
2445
+ c[mask] = 0
2446
+ if return_progress:
2447
+ return_progress_lst.append(c.copy())
2448
+ elms_done = elms_done[c[elms_done] > 0]
2449
+ elms_seed = elms_done.copy()
2450
+ elms_done = set(elms_done)
2451
+
2452
+ params = inspect.getfullargspec(fun).args[1:]
2453
+ i_step = 0
2454
+ start = time.time()
2455
+ while True:
2456
+ # reorder to highest c first
2457
+ elms_seed = elms_seed[np.argsort(-c[elms_seed])]
2458
+ elm_to_compute = set()
2459
+
2460
+ # compute last 5% in one batch
2461
+ if geo is not None and len(elms_done) > 0.95 * len(elm_idx_list):
2462
+ elm_to_compute = list(set(elm_idx_list).difference(elms_done))
2463
+ print(f"last 5% {len(elms_done)} -> {len(elm_to_compute)}")
2464
+ if not elm_to_compute:
2465
+ break
2466
+
2467
+ for elm in elm_to_compute: # elm = list(elm_to_compute)[0]
2468
+ # use nearest element's params to fit this element
2469
+ nearest_idx = np.argmin(np.linalg.norm(np.mean(geo[con][c > 0], axis=1) -
2470
+ np.mean(geo[con[elm]], axis=0), axis=1))
2471
+ nearest_idx = np.argwhere(np.sum(con == con[c > 0][nearest_idx], axis=1) == 3)[0][0]
2472
+ for p in params:
2473
+ init_vals.iloc[elm][p] = best_values.iloc[nearest_idx][p]
2474
+
2475
+ else:
2476
+ # increase spread with increasing steps
2477
+ for _ in range(i_step + 1):
2478
+ for elm_done_i in elms_seed:
2479
+ # find neighbors for the already computed element
2480
+ mask = np.sum(np.isin(con, con[elm_done_i, :]), axis=1)
2481
+ neighbors = set(np.where((0 < mask) & (mask < 3))[0])
2482
+
2483
+ # remove elms we already have computed or don't want to compute
2484
+ neighbors.difference_update(elms_done, bad_elm_idx, elm_to_compute)
2485
+ neighbors.intersection_update(elm_idx_list)
2486
+ if not neighbors:
2487
+ continue
2488
+ elm_to_compute.update(neighbors)
2489
+ for neigh in neighbors:
2490
+ # use params of done neighbors as init vals for their neighbors
2491
+ for p in params:
2492
+ init_vals[p][neigh] = best_values.iloc[elm_done_i][p]
2493
+ best_values.iloc[neigh] = best_values.iloc[elm_done_i]
2494
+
2495
+ print(f"{i_step:0>3}: Seed {len(elms_seed)} -> {len(elm_to_compute)}")
2496
+
2497
+ # refit already here some elements
2498
+ not_fitted_elms = c[np.array(list(elms_done))] <= 0
2499
+ # get random neighbor to refit
2500
+ idx_disc, idx_neighbor = pynibs.get_indices_discontinuous_data(data=c[np.array(list(elms_done))],
2501
+ con=con[np.array(list(elms_done))],
2502
+ neighbor=True,
2503
+ deviation_factor=3, min_val=1e-12,
2504
+ not_fitted_elms=not_fitted_elms,
2505
+ crit='randmax',
2506
+ neigh_style='point')
2507
+ idx_disc = np.setdiff1d(idx_disc, bad_elm_idx)
2508
+ # set init values from neighbors best values
2509
+ for p in params:
2510
+ for idx_refit, idx_neighbor in zip(idx_disc, idx_neighbor):
2511
+ init_vals[p][idx_refit] = best_values.iloc[idx_neighbor][p]
2512
+ elm_to_compute.update(idx_disc)
2513
+ # save the elms that will be computed now for the next iteration as seed elements
2514
+ elms_seed = elm_to_compute.copy()
2515
+ elm_to_compute = list(elm_to_compute)
2516
+ elms_seed = np.array(list(elms_seed))
2517
+
2518
+ if not elm_to_compute:
2519
+ # end loop
2520
+ break
2521
+
2522
+ # start = time.time()
2523
+ c[elm_to_compute], best_values.iloc[elm_to_compute] = fit_elms(
2524
+ elm_idx_list=elm_to_compute,
2525
+ e_matrix=e_matrix,
2526
+ mep=mep,
2527
+ fun=fun,
2528
+ init_vals=init_vals,
2529
+ limits=limits,
2530
+ log_scale=log_scale,
2531
+ constants=constants,
2532
+ max_nfev=1,
2533
+ verbose=verbose,
2534
+ score_type=score_type)
2535
+ if return_progress:
2536
+ return_progress_lst.append(c.copy())
2537
+ i_step += 1
2538
+ elms_done.update(elm_to_compute)
2539
+ stop = time.time()
2540
+
2541
+ if verbose:
2542
+ print(
2543
+ f"Proc{str_pref}: > Stepdown iterations {i_step:0>2} "
2544
+ f"({len(elms_done)}/{n_elm - len(bad_elm_idx)} ({len(bad_elm_idx)} excluded) elements): done. "
2545
+ f"({stop - start:.2f} s "
2546
+ f"{np.sum(c[elm_to_compute] > 1e-6)} > 0)")
2547
+
2548
+ # refit the discontinues elemens
2549
+ not_fitted_elms = c == 0
2550
+ idx_disc, idx_neighbor = pynibs.get_indices_discontinuous_data(data=c, con=con[elm_idx_list], neighbor=True,
2551
+ deviation_factor=3, min_val=1e-12,
2552
+ not_fitted_elms=not_fitted_elms, crit='max',
2553
+ neigh_style='point')
2554
+ idx_disc = np.setdiff1d(idx_disc, bad_elm_idx)
2555
+
2556
+ disc_fit_i, last_disc_idx = 1, idx_disc
2557
+ # only repeat until no improvement
2558
+ while 0 < len(idx_disc) and disc_fit_i <= n_refit:
2559
+
2560
+ # set init values from neighbors best values
2561
+ for p in params:
2562
+ for idx_refit, idx_neighbor in zip(idx_disc, idx_neighbor):
2563
+ init_vals[p][idx_refit] = best_values.iloc[idx_neighbor][p]
2564
+
2565
+ start = time.time()
2566
+ c[idx_disc], best_values.iloc[idx_disc] = fit_elms(
2567
+ elm_idx_list=elm_idx_list[idx_disc],
2568
+ e_matrix=e_matrix,
2569
+ mep=mep,
2570
+ fun=fun,
2571
+ init_vals=init_vals,
2572
+ limits=limits,
2573
+ log_scale=log_scale,
2574
+ constants=constants,
2575
+ max_nfev=int(np.log2(disc_fit_i)), # inrease number of nfev each time
2576
+ verbose=verbose,
2577
+ score_type=score_type)
2578
+ if return_progress:
2579
+ return_progress_lst.append(c.copy())
2580
+
2581
+ # prepare the next iteration
2582
+ last_disc_n = len(idx_disc)
2583
+ idx_disc, idx_neighbor = pynibs.get_indices_discontinuous_data(data=c, con=con[elm_idx_list], neighbor=True,
2584
+ deviation_factor=3, min_val=1e-12,
2585
+ not_fitted_elms=not_fitted_elms, crit='max',
2586
+ neigh_style='point')
2587
+ idx_disc = np.setdiff1d(idx_disc, bad_elm_idx)
2588
+ if np.all(last_disc_idx == idx_disc):
2589
+ break
2590
+ stop = time.time()
2591
+
2592
+ if verbose:
2593
+ print(
2594
+ f"Proc{str_pref}: > Discontinuous refit {disc_fit_i:0>2} ({last_disc_n} elements): "
2595
+ f"done ({stop - start:.2f} s)")
2596
+ disc_fit_i += 1
2597
+
2598
+ # smooth data
2599
+ if smooth_data:
2600
+ smoothed_c = np.zeros_like(c)
2601
+ for i, c_dat in np.ndenumerate(c):
2602
+ mask = np.sum(np.isin(con, con[i, :]), axis=1)
2603
+ mask = np.logical_and(0 < mask, mask < 3)
2604
+ # if c_dat <= 0:
2605
+ if np.sum(c[mask] == 0) != 0:
2606
+ # if all elements are < 0
2607
+ smoothed_c[i] = 0.3 * np.mean(c[mask][c[mask] == 0]) + 0.7 * c_dat
2608
+ else:
2609
+ # otherwise just use good elements
2610
+ smoothed_c[i] = c_dat
2611
+ # else:
2612
+ # don't touch element if it's the only one with positive values
2613
+ # if len(c[mask][c[mask] > 0]) > 1:
2614
+ # smoothed_c[i] = 0.3 * np.mean(c[mask][c[mask] > 0]) + 0.7 * c_dat
2615
+ # else:
2616
+ # smoothed_c[i] = c_dat
2617
+ c = smoothed_c
2618
+ if return_progress:
2619
+ return_progress_lst.append(c.copy())
2620
+ c_all[:, i_q] = c.flatten()
2621
+
2622
+ endtime = time.time()
2623
+ print(f"Proc{str_pref}: > c-map for {len(zap_idx)} zaps done ({endtime - starttime:2.2f}s). ")
2624
+ if verbose:
2625
+ bad_elm_idx = list(bad_elm_idx)
2626
+ print(f"Proc{str_pref}: > min | med | max: "
2627
+ f"{np.min(np.delete(c, bad_elm_idx)):2.2f} | "
2628
+ f"{np.median(np.delete(c, bad_elm_idx)):2.2f} | "
2629
+ f"{np.max(np.delete(c, bad_elm_idx)):2.2f}. "
2630
+ f"{np.sum(c > 0)} elms > 0 | "
2631
+ f"{len(c)} n_elms")
2632
+ ret = {'c': c_all}
2633
+ if return_fits:
2634
+ ret['best_values'] = best_values
2635
+ if return_e_field_stats:
2636
+ ret['stats'] = e_stats_dicts
2637
+ if return_progress:
2638
+ ret['progress'] = return_progress_lst
2639
+ return ret
2640
+
2641
+
2642
+ def sing_elm_fitted(elm_idx_list, mep_lst, mep_params, e, alpha=1000, n_samples=100):
2643
+ """
2644
+ Mass-univariate ridge regressions on fitted MEP_{AMP} ~ E.
2645
+ That is, for each element in elm_idx_list, it's E (mag | norm | tan) for each zap regressed on the raw MEP
2646
+ amplitude. An element wise sklearn.metrics.regression.r2_score is returned.
2647
+
2648
+ Parameters
2649
+ ----------
2650
+ elm_idx_list : np.ndarray
2651
+ (n_used_ele) List of element indices, the congruence factor is computed for.
2652
+ mep_lst : list of Mep object instances
2653
+ (n_conds) List of fitted Mep object instances for all conditions (see exp.py for more information).
2654
+ mep_params : np.ndarray of float
2655
+ (n_mep_params_total) List of all mep parameters of curve fits used to calculate the MEP accumulated into
2656
+ one array.(e.g.: [mep_#1_para_#1, mep_#1_para_#2, mep_#1_para_#3, mep_#2_para_#1, mep_#2_para_#1, ...])
2657
+ e : np.ndarray of float
2658
+ (n_elm, n_cond, n_qoi) Electric field to compute the r2 factor for, e.g. (e_mag, e_norm, e_tan).
2659
+ n_samples : int, default=100
2660
+ Number of data points to generate discrete mep and e curves.
2661
+
2662
+ Returns
2663
+ -------
2664
+ r2 : np.ndarray of float
2665
+ (n_roi, n_datasets) R^2 for each element in elm_idx_list.
2666
+ """
2667
+
2668
+ def cartesian_product(*arrays):
2669
+ """
2670
+ Fast implementation to get cartesian product of two arrays.
2671
+
2672
+ cartesian_product([a,b,c],[2,3]) =
2673
+ [a, 2
2674
+ a, 3
2675
+ b, 2
2676
+ b, 3
2677
+ c, 2
2678
+ c, 3]
2679
+ """
2680
+ la = len(arrays)
2681
+ dtype = np.result_type(*arrays)
2682
+ arr = np.empty([len(a) for a in arrays] + [la], dtype=dtype)
2683
+ for i, a in enumerate(np.ix_(*arrays)):
2684
+ arr[..., i] = a
2685
+ return arr.reshape(-1, la)
2686
+
2687
+ n_eqoi = e.shape[2]
2688
+ n_cond = e.shape[1]
2689
+ n_elm = e.shape[0]
2690
+ assert n_cond == len(mep_lst)
2691
+ mep_params = np.array(mep_params).flatten()
2692
+ mep_params_cond = []
2693
+ start_idx = 0
2694
+ for i_cond in range(n_cond):
2695
+ mep_params_cond.append(mep_params[start_idx:(start_idx + mep_lst[i_cond].popt.size)])
2696
+ start_idx = start_idx + mep_lst[i_cond].popt.size
2697
+
2698
+ del start_idx
2699
+
2700
+ intensities = []
2701
+ amplitudes = []
2702
+
2703
+ reg_r2 = np.empty((n_elm, n_eqoi))
2704
+
2705
+ # get amplitudes from fitted meps
2706
+ for i_cond in range(n_cond):
2707
+ intensities.append(np.linspace(mep_lst[i_cond].x_limits[0], mep_lst[i_cond].x_limits[1], n_samples))
2708
+ amplitudes.append(mep_lst[i_cond].eval(intensities[-1], mep_params_cond[i_cond]))
2709
+ # intensities_min.append(mep_lst[i_cond].x_limits[0])
2710
+ # intensities_max.append(mep_lst[i_cond].x_limits[1])
2711
+ amplitudes = np.array(amplitudes).flatten()
2712
+
2713
+ for qoi_idx in range(n_eqoi):
2714
+ # t_q = time.time()
2715
+ x = pd.DataFrame()
2716
+ index_shift = 0
2717
+
2718
+ for mep_i, mep_lst in enumerate(mep_lst):
2719
+ # condition wise, as we stimulated with different intensities per conditions
2720
+
2721
+ # for each element in roi, one datapoint for each zap.
2722
+ current = cartesian_product(e[:, mep_i, qoi_idx], intensities[mep_i])
2723
+
2724
+ # index is iteration of zaps over all conditions
2725
+ index = cartesian_product(e[:, mep_i, qoi_idx], np.arange(n_samples))[:, 1]
2726
+ index += index_shift
2727
+ index_shift = index[-1] + 1
2728
+
2729
+ # el is range(n_elements) * n_zaps_in_condition
2730
+ el_idx = np.repeat(np.arange(e.shape[0]), n_samples)
2731
+
2732
+ # intensity * e
2733
+ e_zap = np.multiply(current[:, 0], current[:, 1])
2734
+
2735
+ # put all together
2736
+ x_cond = pd.DataFrame(data={"index": index.astype(int),
2737
+ "el": el_idx,
2738
+ "e": e_zap})
2739
+ # amplitudes = np.append(amplitudes, mep_lst.mep)
2740
+ # "current": current[:, 1],
2741
+ # "mep": mep[:,1]})
2742
+
2743
+ # x_cond['condition'] = mep_i
2744
+ x = x.append(x_cond)
2745
+ # x.shape is now (n_zaps*n_elms, 3)
2746
+
2747
+ # reshape to (n_zaps, n_elms)
2748
+ x = x.pivot(index="index", columns="el", values="e") # this is pretty slow
2749
+
2750
+ do_reg_poly = False
2751
+ do_reg_linear = True
2752
+
2753
+ # reg = Ridge(alpha=alpha)
2754
+ reg = LinearRegression(normalize=True)
2755
+ if do_reg_poly:
2756
+ raise NotImplementedError
2757
+
2758
+ elif do_reg_linear:
2759
+ # Do one regression per element.
2760
+ r_t = time.time()
2761
+
2762
+ def get_score(x_i):
2763
+ """Helper function do be used by pd.apply() to speed up things.
2764
+
2765
+ Parameters
2766
+ ----------
2767
+ x_i: pd.Series
2768
+ Column with e for a single elm.
2769
+
2770
+ Returns
2771
+ -------
2772
+ r2 for amplitudes ~ E
2773
+ """
2774
+ # x_i = x_i.reshape(-1, 1)
2775
+ reg.fit(x_i.reshape(-1, 1), amplitudes)
2776
+ return reg.score(x_i.reshape(-1, 1), amplitudes)
2777
+
2778
+ scores = x.apply(get_score, axis=0, raw=True)
2779
+ # coefs = [get_score(x.iloc[:, el_idx]) for el_idx in range(n_elm)]
2780
+ print("all_reg: {}".format(time.time() - r_t))
2781
+
2782
+ reg_r2[:, qoi_idx] = np.array(scores)
2783
+ # data.append(data_qoi_tmp)
2784
+
2785
+ # print "qoi {}: {}".format(qoi_idx, time.time() - t_q)
2786
+
2787
+ return reg_r2
2788
+
2789
+
2790
+ def logistic_regression():
2791
+ """
2792
+ Some ideas on how to improve regression approach
2793
+
2794
+ 1. De-log data
2795
+
2796
+ Data range has to be transformed to a reasonable range. For a full sigmoid, -10:10 looks ok
2797
+
2798
+ .. code-block:: python
2799
+
2800
+ sig <- function(z) {
2801
+ return( 1 / (1 + exp(-z)))
2802
+ }
2803
+
2804
+ .. code-block:: python
2805
+
2806
+ desig <- function(x) {
2807
+ return(- log((1/x) - 1))
2808
+ }
2809
+
2810
+ This might be a reasonable fast approach, but the parameter range has to be estimated. Maybe remove some outliters?
2811
+
2812
+ 2. fit logistic regression to raw data
2813
+ scipy.optimize provides fit_curve(), which does OLS-ish fitting to a given function
2814
+ https://stackoverflow.com/questions/54376900/fit-sigmoid-curve-in-python
2815
+
2816
+ I expect this to be rather slow.
2817
+
2818
+ 3. Use the sklearn logistic_regression classifyer and access raw fit data
2819
+ The logistic_regression is implemented as a classifyer, maybe it's possible to use its regression fit results.
2820
+ Implementation should be pretty fast.
2821
+ """
2822
+ raise NotImplementedError
2823
+
2824
+
2825
+ def init(l, zap_lists, res_fn):
2826
+ """
2827
+ Pool init function to use with regression_nl_hdf5_single_core_write().
2828
+
2829
+ Parameters
2830
+ ----------
2831
+ l : multiprocessing.Lock()
2832
+ zap_lists : list of list of int
2833
+ Which zaps to compute.
2834
+ res_fn : str
2835
+ .hdf5 fn
2836
+ """
2837
+
2838
+ global lock, z, fn
2839
+ lock = l
2840
+ z = zap_lists
2841
+ fn = res_fn
2842
+
2843
+
2844
+ def single_fit(x, y, fun):
2845
+ """
2846
+ Performs a single fit and returns fit object.
2847
+
2848
+ Parameters
2849
+ ----------
2850
+ x : ndarray of float
2851
+ x-values.
2852
+ y : ndarray of float
2853
+ y-values.
2854
+ fun : function
2855
+ Function for fitting.
2856
+
2857
+ Returns
2858
+ -------
2859
+ fit : gmodel fit object
2860
+ Fit object.
2861
+ """
2862
+ params = inspect.getfullargspec(fun).args[1:]
2863
+
2864
+ limits = dict()
2865
+ init_vals = dict()
2866
+ log_scale = False
2867
+ if fun == pynibs.expio.fit_funs.linear:
2868
+ log_scale = False
2869
+ limits["m"] = [-100, 100]
2870
+ limits["n"] = [-100, 100]
2871
+
2872
+ init_vals["m"] = 0.3
2873
+ init_vals["n"] = -1
2874
+
2875
+ elif fun == pynibs.expio.fit_funs.linear_log:
2876
+ log_scale = True
2877
+ limits["m"] = [-100, 100]
2878
+ limits["n"] = [-100, 100]
2879
+
2880
+ init_vals["m"] = 0.3
2881
+ init_vals["n"] = -1
2882
+
2883
+ elif fun == pynibs.expio.fit_funs.exp0:
2884
+ log_scale = False
2885
+ limits["x0"] = [0, 1000]
2886
+ limits["r"] = [1e-12, 5]
2887
+
2888
+ init_vals["x0"] = np.mean(x) # 40
2889
+ init_vals["r"] = 10 / np.max(x)
2890
+
2891
+ elif fun == pynibs.expio.fit_funs.exp:
2892
+ log_scale = False
2893
+ limits["x0"] = [0, 1000]
2894
+ limits["r"] = [1e-12, 5]
2895
+ limits['y0'] = [-.1, 5]
2896
+
2897
+ init_vals["x0"] = np.mean(x) # 40
2898
+ init_vals["r"] = 10 / np.max(x)
2899
+ init_vals["y0"] = 0
2900
+
2901
+ elif fun == pynibs.expio.fit_funs.exp0_log:
2902
+ log_scale = True
2903
+ limits["x0"] = [1e-12, 1000]
2904
+ limits["r"] = [1e-12, 5]
2905
+ limits['y0'] = [-.1, 5]
2906
+
2907
+ init_vals["x0"] = np.mean(x) # 40
2908
+ init_vals["r"] = 10 / np.max(x)
2909
+ init_vals["y0"] = 0
2910
+
2911
+ elif fun == pynibs.expio.fit_funs.exp_log:
2912
+ log_scale = True
2913
+ limits["x0"] = [1e-12, 1000]
2914
+ limits["r"] = [1e-12, 5]
2915
+ limits['y0'] = [-.1, 5]
2916
+
2917
+ init_vals["x0"] = np.mean(x) # 40
2918
+ init_vals["r"] = 10 / np.max(x)
2919
+ init_vals["y0"] = 0
2920
+
2921
+ elif fun == pynibs.expio.fit_funs.sigmoid:
2922
+ log_scale = False
2923
+
2924
+ limits["x0"] = [0, 1000]
2925
+ limits["amp"] = [1e-12, 1000]
2926
+ limits["r"] = [1e-12, 100]
2927
+
2928
+ init_vals["x0"] = np.mean(x) # 70
2929
+ init_vals["amp"] = np.max(y)
2930
+ init_vals["r"] = 10 / np.max(x)
2931
+
2932
+ elif fun == pynibs.expio.fit_funs.sigmoid4:
2933
+ log_scale = False
2934
+
2935
+ limits["x0"] = [0, 1000]
2936
+ limits["amp"] = [1e-12, 1000]
2937
+ limits["r"] = [1e-12, 100]
2938
+ limits["y0"] = [1e-12, 10]
2939
+
2940
+ init_vals["x0"] = np.mean(x) # 70
2941
+ init_vals["amp"] = np.max(y)
2942
+ init_vals["r"] = 10 / np.max(x)
2943
+ init_vals["y0"] = 1e-2
2944
+
2945
+ elif fun == pynibs.expio.fit_funs.sigmoid_log:
2946
+ log_scale = True
2947
+
2948
+ limits["x0"] = [0, 1000]
2949
+ limits["amp"] = [1e-12, 1000]
2950
+ limits["r"] = [1e-12, 100]
2951
+
2952
+ init_vals["x0"] = np.mean(x) # 50
2953
+ init_vals["amp"] = np.max(y)
2954
+ init_vals["r"] = 10 / np.max(x)
2955
+
2956
+ elif fun == pynibs.expio.fit_funs.sigmoid4_log:
2957
+ log_scale = True
2958
+
2959
+ limits["x0"] = [0, 1000]
2960
+ limits["amp"] = [1e-12, 100]
2961
+ limits["r"] = [1e-12, 10]
2962
+ limits["y0"] = [1e-6, 1e-1]
2963
+
2964
+ init_vals["x0"] = np.mean(x) # 50
2965
+ init_vals["amp"] = np.max(y)
2966
+ init_vals["r"] = 10 / np.max(x)
2967
+ init_vals["y0"] = 1e-2
2968
+ else:
2969
+ raise NotImplementedError(f"Function {fun} not implemented.")
2970
+ if log_scale:
2971
+ y = np.log10(y)
2972
+
2973
+ # set up gmodel
2974
+ gmodel = Model(fun)
2975
+
2976
+ for p in params:
2977
+ gmodel.set_param_hint(p, value=init_vals[p], min=limits[p][0], max=limits[p][1])
2978
+
2979
+ gmodel.make_params()
2980
+
2981
+ # perform fit
2982
+ fit = gmodel.fit(y, x=x)
2983
+
2984
+ return fit