pyNIBS 0.2024.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. pyNIBS-0.2024.8.dist-info/LICENSE +623 -0
  2. pyNIBS-0.2024.8.dist-info/METADATA +723 -0
  3. pyNIBS-0.2024.8.dist-info/RECORD +107 -0
  4. pyNIBS-0.2024.8.dist-info/WHEEL +5 -0
  5. pyNIBS-0.2024.8.dist-info/top_level.txt +1 -0
  6. pynibs/__init__.py +34 -0
  7. pynibs/coil.py +1367 -0
  8. pynibs/congruence/__init__.py +15 -0
  9. pynibs/congruence/congruence.py +1108 -0
  10. pynibs/congruence/ext_metrics.py +257 -0
  11. pynibs/congruence/stimulation_threshold.py +318 -0
  12. pynibs/data/configuration_exp0.yaml +59 -0
  13. pynibs/data/configuration_linear_MEP.yaml +61 -0
  14. pynibs/data/configuration_linear_RT.yaml +61 -0
  15. pynibs/data/configuration_sigmoid4.yaml +68 -0
  16. pynibs/data/network mapping configuration/configuration guide.md +238 -0
  17. pynibs/data/network mapping configuration/configuration_TEMPLATE.yaml +42 -0
  18. pynibs/data/network mapping configuration/configuration_for_testing.yaml +43 -0
  19. pynibs/data/network mapping configuration/configuration_modelTMS.yaml +43 -0
  20. pynibs/data/network mapping configuration/configuration_reg_isi_05.yaml +43 -0
  21. pynibs/data/network mapping configuration/output_documentation.md +185 -0
  22. pynibs/data/network mapping configuration/recommendations_for_accuracy_threshold.md +77 -0
  23. pynibs/data/neuron/models/L23_PC_cADpyr_biphasic_v1.csv +1281 -0
  24. pynibs/data/neuron/models/L23_PC_cADpyr_monophasic_v1.csv +1281 -0
  25. pynibs/data/neuron/models/L4_LBC_biphasic_v1.csv +1281 -0
  26. pynibs/data/neuron/models/L4_LBC_monophasic_v1.csv +1281 -0
  27. pynibs/data/neuron/models/L4_NBC_biphasic_v1.csv +1281 -0
  28. pynibs/data/neuron/models/L4_NBC_monophasic_v1.csv +1281 -0
  29. pynibs/data/neuron/models/L4_SBC_biphasic_v1.csv +1281 -0
  30. pynibs/data/neuron/models/L4_SBC_monophasic_v1.csv +1281 -0
  31. pynibs/data/neuron/models/L5_TTPC2_cADpyr_biphasic_v1.csv +1281 -0
  32. pynibs/data/neuron/models/L5_TTPC2_cADpyr_monophasic_v1.csv +1281 -0
  33. pynibs/expio/Mep.py +1518 -0
  34. pynibs/expio/__init__.py +8 -0
  35. pynibs/expio/brainsight.py +979 -0
  36. pynibs/expio/brainvis.py +71 -0
  37. pynibs/expio/cobot.py +239 -0
  38. pynibs/expio/exp.py +1876 -0
  39. pynibs/expio/fit_funs.py +287 -0
  40. pynibs/expio/localite.py +1987 -0
  41. pynibs/expio/signal_ced.py +51 -0
  42. pynibs/expio/visor.py +624 -0
  43. pynibs/freesurfer.py +502 -0
  44. pynibs/hdf5_io/__init__.py +10 -0
  45. pynibs/hdf5_io/hdf5_io.py +1857 -0
  46. pynibs/hdf5_io/xdmf.py +1542 -0
  47. pynibs/mesh/__init__.py +3 -0
  48. pynibs/mesh/mesh_struct.py +1394 -0
  49. pynibs/mesh/transformations.py +866 -0
  50. pynibs/mesh/utils.py +1103 -0
  51. pynibs/models/_TMS.py +211 -0
  52. pynibs/models/__init__.py +0 -0
  53. pynibs/muap.py +392 -0
  54. pynibs/neuron/__init__.py +2 -0
  55. pynibs/neuron/neuron_regression.py +284 -0
  56. pynibs/neuron/util.py +58 -0
  57. pynibs/optimization/__init__.py +5 -0
  58. pynibs/optimization/multichannel.py +278 -0
  59. pynibs/optimization/opt_mep.py +152 -0
  60. pynibs/optimization/optimization.py +1445 -0
  61. pynibs/optimization/workhorses.py +698 -0
  62. pynibs/pckg/__init__.py +0 -0
  63. pynibs/pckg/biosig/biosig4c++-1.9.5.src_fixed.tar.gz +0 -0
  64. pynibs/pckg/libeep/__init__.py +0 -0
  65. pynibs/pckg/libeep/pyeep.so +0 -0
  66. pynibs/regression/__init__.py +11 -0
  67. pynibs/regression/dual_node_detection.py +2375 -0
  68. pynibs/regression/regression.py +2984 -0
  69. pynibs/regression/score_types.py +0 -0
  70. pynibs/roi/__init__.py +2 -0
  71. pynibs/roi/roi.py +895 -0
  72. pynibs/roi/roi_structs.py +1233 -0
  73. pynibs/subject.py +1009 -0
  74. pynibs/tensor_scaling.py +144 -0
  75. pynibs/tests/data/InstrumentMarker20200225163611937.xml +19 -0
  76. pynibs/tests/data/TriggerMarkers_Coil0_20200225163443682.xml +14 -0
  77. pynibs/tests/data/TriggerMarkers_Coil1_20200225170337572.xml +6373 -0
  78. pynibs/tests/data/Xdmf.dtd +89 -0
  79. pynibs/tests/data/brainsight_niiImage_nifticoord.txt +145 -0
  80. pynibs/tests/data/brainsight_niiImage_nifticoord_largefile.txt +1434 -0
  81. pynibs/tests/data/brainsight_niiImage_niifticoord_mixedtargets.txt +47 -0
  82. pynibs/tests/data/create_subject_testsub.py +332 -0
  83. pynibs/tests/data/data.hdf5 +0 -0
  84. pynibs/tests/data/geo.hdf5 +0 -0
  85. pynibs/tests/test_coil.py +474 -0
  86. pynibs/tests/test_elements2nodes.py +100 -0
  87. pynibs/tests/test_hdf5_io/test_xdmf.py +61 -0
  88. pynibs/tests/test_mesh_transformations.py +123 -0
  89. pynibs/tests/test_mesh_utils.py +143 -0
  90. pynibs/tests/test_nnav_imports.py +101 -0
  91. pynibs/tests/test_quality_measures.py +117 -0
  92. pynibs/tests/test_regressdata.py +289 -0
  93. pynibs/tests/test_roi.py +17 -0
  94. pynibs/tests/test_rotations.py +86 -0
  95. pynibs/tests/test_subject.py +71 -0
  96. pynibs/tests/test_util.py +24 -0
  97. pynibs/tms_pulse.py +34 -0
  98. pynibs/util/__init__.py +4 -0
  99. pynibs/util/dosing.py +233 -0
  100. pynibs/util/quality_measures.py +562 -0
  101. pynibs/util/rotations.py +340 -0
  102. pynibs/util/simnibs.py +763 -0
  103. pynibs/util/util.py +727 -0
  104. pynibs/visualization/__init__.py +2 -0
  105. pynibs/visualization/para.py +4372 -0
  106. pynibs/visualization/plot_2D.py +137 -0
  107. pynibs/visualization/render_3D.py +347 -0
@@ -0,0 +1,2375 @@
1
+ """
2
+ Contains functions for applying and testing single and dual node network detection. The two main functions are
3
+ - network_detection_algorithm_application(): to apply the NDA to experimental TMS data, and
4
+ - network_detection_algorithm_testing(): to generate artificial data and apply the NDA for testing reasons.
5
+ Details to all settings and options can be found in data/network mapping configuration/configuration guide.md.
6
+ """
7
+ import os
8
+ import csv
9
+ import time
10
+ import h5py
11
+ import scipy
12
+ import lmfit
13
+ import trimesh
14
+ import numpy as np
15
+ import pandas as pd
16
+ from tqdm import tqdm
17
+ import multiprocessing
18
+ from sklearn import tree
19
+ import matplotlib.pyplot as plt
20
+ from numpy.random import default_rng
21
+ from scipy.signal import argrelextrema
22
+ from sklearn.neighbors._kde import KernelDensity
23
+ import sklearn.feature_selection as fs
24
+ import pynibs
25
+
26
+ VERBOSE = False
27
+
28
+
29
+ def determine_coil_position_idcs(num_idcs, all_efields, rn_seed):
30
+ """
31
+ Select coil positions from the set of all positions.
32
+
33
+ Parameters
34
+ ----------
35
+ num_idcs : int
36
+ How many coil positions to select (sample size).
37
+ all_efields : ndarray of float
38
+ (n_zaps_available, n_elms) Efields generated by all available coil positions.
39
+ rn_seed : int
40
+ Random seed used in selection.
41
+ Returns
42
+ -------
43
+ selection : ndarray of int
44
+ (n_zaps_used) Indices of chosen coil positions.
45
+ """
46
+ selection = np.array([],dtype=int)
47
+
48
+ all_idcs = list(range(all_efields.shape[0]))
49
+
50
+ rn_generator = default_rng(seed=rn_seed)
51
+ selection = rn_generator.choice(all_idcs,size=num_idcs,replace=False)
52
+
53
+ return selection
54
+
55
+
56
+ def determine_detectable_hotspots(e_subset, rn_seed):
57
+ """
58
+ From a given e-field matrix, two possible hotspots are picked, with the requirements that both e-fields (their
59
+ maximum value) reach values higher than 1.0 and that they are not too close to each other (corr_coef < 0.5)
60
+ Returns the indices of the hotspots to be.
61
+
62
+ Parameters
63
+ ----------
64
+ e_subset : np.ndarray of float
65
+ (n_zaps, n_elms) The efield magnitudes of the used coil positions across all ROI elements.
66
+ rn_seed : int
67
+ Random_seed used for hotspot selection.
68
+ Returns
69
+ -------
70
+ idx0 : int
71
+ index of chosen hotspot element 0.
72
+ idx1 : int
73
+ index of chosen hotspot element 1.
74
+ """
75
+ finished = False
76
+ e_max_thr = 1.0 # lower bound for maximum e-field value of a chosen hotspot (0: not limited) # hhh hardcoded values
77
+ corr_thr = 0.5 # upper bound for correlation between the hotspots (1: not limited) # hhh
78
+ np.random.seed(rn_seed)
79
+ while not finished:
80
+ idx0 = np.random.choice(e_subset.shape[1])
81
+ idx1 = np.random.choice(e_subset.shape[1])
82
+
83
+ if pynibs.compute_correlation_with_all_elements(e_subset, idx0)[idx1] < corr_thr:
84
+ if np.max(e_subset[:, idx0]) > e_max_thr and np.max(e_subset[:, idx1]) > e_max_thr:
85
+ finished = True
86
+
87
+ return idx0, idx1
88
+
89
+
90
+ def create_response_data(efields, config):
91
+ """
92
+ Create response data mimicking a chosen network.
93
+ The mean of the used distribution is computed based on the e-field values of the chosen network, the deviation
94
+ depends on the chosen noise level.
95
+
96
+ Parameters
97
+ ----------
98
+ efields : ndarray of float
99
+ (n_zaps, n_elms) Efield generated by used coil positions.
100
+ config: dict
101
+ YAML-configuration file content as dictionary
102
+
103
+ Returns
104
+ -------
105
+ data : ndarray of float
106
+ (n_zaps) Artificially generated response data array.
107
+ """
108
+ data = np.zeros(efields.shape[0])
109
+
110
+ rn_seed = config['rn_seed']
111
+ network_type = config['network_type']
112
+ distribution_type = config['distribution_type']
113
+ jitter_ratio = config['jitter_ratio']
114
+ jitter_scale = config['jitter_scale']
115
+ effect_full = config['effect_full']
116
+ effect_saturation = config['effect_saturation']
117
+ hotspot_idcs = (config['hotspot_elm0'], config['hotspot_elm1'])
118
+ efields_hot = efields[:, hotspot_idcs]
119
+ rn_generator = default_rng(seed=rn_seed)
120
+ np.random.seed(rn_seed)
121
+
122
+ # choose class to pull distribution from
123
+ if distribution_type == 'normal':
124
+ dist = scipy.stats._continuous_distns.norm_gen
125
+ gen_dist = dist(name='gen_dist')
126
+ gen_dist.random_state = rn_generator
127
+ # adjust response magnitude:
128
+ response_shift = 1 # expected mean of N_0
129
+ response_multiplier = 1 # e-mag * response_multiplier = expected effect (nominal values)
130
+ elif distribution_type == 'logistic':
131
+ dist = scipy.stats._continuous_distns.logistic_gen
132
+ gen_dist = dist(name='gen_dist')
133
+ gen_dist.random_state = rn_generator
134
+ response_shift = 0
135
+ response_multiplier = 1
136
+ elif distribution_type == 'ExGaussian': # not fully implemented! ! Parameter K needed next to loc and scale
137
+ dist = scipy.stats._continuous_distns.exponnorm_gen
138
+ gen_dist = dist(name='gen_dist')
139
+ gen_dist.random_state = rn_generator
140
+ response_shift = 400
141
+ response_multiplier = 10
142
+
143
+ else:
144
+ raise NotImplementedError(f"Distribution type {distribution_type} not supported. "
145
+ f"Please choose one of the following:"
146
+ f" 'normal', 'logistic' ")
147
+
148
+ # assign an effect indicator to each trial based on the hotspot e_mag values
149
+ effect_indicator = np.zeros((efields.shape[0], 2))
150
+ for efield_idx in range(0, efields.shape[0]):
151
+ effect_indicator[efield_idx, 0] = min(efields_hot[efield_idx, 0] / effect_full, effect_saturation)
152
+ effect_indicator[efield_idx, 1] = min(efields_hot[efield_idx, 1] / effect_full, effect_saturation)
153
+
154
+ # radical noise: jitter_ratio determines the percentage of data that shows a random effect in [0, effect_saturation]
155
+ rd_size = int(np.round(efields.shape[0] * jitter_ratio))
156
+ rd_idcs = np.random.choice(effect_indicator.shape[0], size=rd_size) # Get random indices
157
+ effect_indicator[rd_idcs, 0] = np.random.uniform(0, effect_saturation, size=rd_size)
158
+ effect_indicator[rd_idcs, 1] = np.random.uniform(0, effect_saturation, size=rd_size)
159
+
160
+ # generate response data for every manifestation, taking the network type into consideration
161
+ if distribution_type == 'ExGaussian':
162
+ if network_type == 'NO':
163
+ for efield_idx in range(0, efields.shape[0]):
164
+ effect = 0
165
+
166
+ loc = response_shift + response_multiplier * effect
167
+ scale = (1+effect/4) * jitter_scale
168
+ idx_prob_dist = gen_dist(loc=loc, scale=scale, K=100)
169
+ data[efield_idx] = idx_prob_dist.rvs()
170
+
171
+ elif network_type == 'AND':
172
+ for efield_idx in range(0, efields.shape[0]):
173
+ effect = (effect_indicator[efield_idx, 0] * effect_indicator[efield_idx, 1])**2 # \in [0, 1.44]
174
+
175
+ loc = response_shift + response_multiplier * effect
176
+ scale = (1+effect/4) * jitter_scale
177
+ idx_prob_dist = gen_dist(loc=loc, scale=scale, K=50)
178
+ data[efield_idx] = idx_prob_dist.rvs()
179
+
180
+ elif network_type == '1_INH_0':
181
+ for efield_idx in range(0, efields.shape[0]):
182
+ effect = max((effect_indicator[efield_idx, 0]**2 - effect_indicator[efield_idx, 1]**2), 0)
183
+
184
+ loc = response_shift + response_multiplier * effect
185
+ scale = (1+effect/4) * jitter_scale
186
+ idx_prob_dist = gen_dist(loc=loc, scale=scale, K=100)
187
+ data[efield_idx] = idx_prob_dist.rvs()
188
+
189
+ elif network_type == 'SH_0':
190
+ for efield_idx in range(0, efields.shape[0]):
191
+ effect = (effect_indicator[efield_idx, 0])**4
192
+
193
+ loc = response_shift + response_multiplier * effect
194
+ scale = (1+effect/4) * jitter_scale
195
+ idx_prob_dist = gen_dist(loc=loc, scale=scale, K=100)
196
+ data[efield_idx] = idx_prob_dist.rvs()
197
+
198
+ elif network_type == '0_INH_1':
199
+ for efield_idx in range(0, efields.shape[0]):
200
+ effect = max((effect_indicator[efield_idx, 1]**2 - effect_indicator[efield_idx, 0]**2), 0)
201
+
202
+ loc = response_shift + response_multiplier * effect
203
+ scale = (1+effect/4) * jitter_scale
204
+ idx_prob_dist = gen_dist(loc=loc, scale=scale, K=100)
205
+ data[efield_idx] = idx_prob_dist.rvs()
206
+
207
+ elif network_type == 'SH_1':
208
+ for efield_idx in range(0, efields.shape[0]):
209
+ effect = (effect_indicator[efield_idx, 1]) ** 4
210
+
211
+ loc = response_shift + response_multiplier * effect
212
+ scale = (1+effect/4) * jitter_scale
213
+ idx_prob_dist = gen_dist(loc=loc, scale=scale, K=100)
214
+ data[efield_idx] = idx_prob_dist.rvs()
215
+
216
+ elif network_type == 'XOR':
217
+ for efield_idx in range(0, efields.shape[0]):
218
+ x = effect_indicator[efield_idx, 0]
219
+ y = effect_indicator[efield_idx, 1]
220
+ if x > y:
221
+ effect = x**2 - y**2
222
+ else:
223
+ effect = y**2 - x**2
224
+
225
+ loc = response_shift + response_multiplier * effect
226
+ scale = (1+effect/4) * jitter_scale
227
+ idx_prob_dist = gen_dist(loc=loc, scale=scale, K=100)
228
+ data[efield_idx] = idx_prob_dist.rvs()
229
+
230
+ elif network_type == 'OR':
231
+ for efield_idx in range(0, efields.shape[0]):
232
+ effect = (effect_indicator[efield_idx, 0]**4 + effect_indicator[efield_idx, 1]**4)/2
233
+
234
+ loc = response_shift + response_multiplier * effect
235
+ scale = (1+effect/4) * jitter_scale
236
+ idx_prob_dist = gen_dist(loc=loc, scale=scale, K=100)
237
+ data[efield_idx] = idx_prob_dist.rvs()
238
+
239
+ else:
240
+ print("Selected network type not supported!")
241
+ raise NotImplementedError()
242
+
243
+ else: # other distributions (merge after solved distribution parameter problem ?)
244
+
245
+ if network_type == 'NO':
246
+ for efield_idx in range(0, efields.shape[0]):
247
+ effect = 0
248
+
249
+ loc = response_shift + response_multiplier * effect
250
+ scale = (1+effect/2) * jitter_scale
251
+ idx_prob_dist = gen_dist(loc=loc, scale=scale)
252
+ data[efield_idx] = idx_prob_dist.rvs()
253
+
254
+ elif network_type == 'AND':
255
+ for efield_idx in range(0, efields.shape[0]):
256
+ effect = (effect_indicator[efield_idx, 0] * effect_indicator[efield_idx, 1]) ** 2 # \in [0, 1.44]
257
+
258
+ loc = response_shift + response_multiplier * effect
259
+ scale = (1+effect/2) * jitter_scale
260
+ idx_prob_dist = gen_dist(loc=loc, scale=scale)
261
+ data[efield_idx] = idx_prob_dist.rvs()
262
+
263
+ elif network_type == '1_INH_0':
264
+ for efield_idx in range(0, efields.shape[0]):
265
+ # effect = max((effect_indicator[efield_idx, 0]**2 - effect_indicator[efield_idx, 1]**2), 0)
266
+ effect = max((effect_indicator[efield_idx, 0] ** 2 - effect_indicator[efield_idx, 1] ** 2), 0)
267
+
268
+ loc = response_shift + response_multiplier * effect
269
+ scale = (1+effect/2) * jitter_scale
270
+ idx_prob_dist = gen_dist(loc=loc, scale=scale)
271
+ data[efield_idx] = idx_prob_dist.rvs()
272
+
273
+ elif network_type == 'SH_0':
274
+ for efield_idx in range(0, efields.shape[0]):
275
+ effect = (effect_indicator[efield_idx, 0]) ** 4
276
+
277
+ loc = response_shift + response_multiplier * effect
278
+ scale = (1+effect/2) * jitter_scale
279
+ idx_prob_dist = gen_dist(loc=loc, scale=scale)
280
+ data[efield_idx] = idx_prob_dist.rvs()
281
+
282
+ elif network_type == '0_INH_1':
283
+ for efield_idx in range(0, efields.shape[0]):
284
+ effect = max((effect_indicator[efield_idx, 1] ** 2 - effect_indicator[efield_idx, 0] ** 2), 0)
285
+
286
+ loc = response_shift + response_multiplier * effect
287
+ scale = (1+effect/2) * jitter_scale
288
+ idx_prob_dist = gen_dist(loc=loc, scale=scale)
289
+ data[efield_idx] = idx_prob_dist.rvs()
290
+
291
+ elif network_type == 'SH_1':
292
+ for efield_idx in range(0, efields.shape[0]):
293
+ effect = (effect_indicator[efield_idx, 1]) ** 4
294
+
295
+ loc = response_shift + response_multiplier * effect
296
+ scale = (1+effect/2) * jitter_scale
297
+ idx_prob_dist = gen_dist(loc=loc, scale=scale)
298
+ data[efield_idx] = idx_prob_dist.rvs()
299
+
300
+ elif network_type == 'XOR':
301
+ for efield_idx in range(0, efields.shape[0]):
302
+ x = effect_indicator[efield_idx, 0]
303
+ y = effect_indicator[efield_idx, 1]
304
+ if x > y:
305
+ effect = x ** 2 - y ** 2
306
+ else:
307
+ effect = y ** 2 - x ** 2
308
+
309
+ loc = response_shift + response_multiplier * effect
310
+ scale = (1+effect/2) * jitter_scale
311
+ idx_prob_dist = gen_dist(loc=loc, scale=scale)
312
+ data[efield_idx] = idx_prob_dist.rvs()
313
+
314
+ elif network_type == 'OR':
315
+ for efield_idx in range(0, efields.shape[0]):
316
+ effect = (effect_indicator[efield_idx, 0] ** 4 + effect_indicator[efield_idx, 1] ** 4) / 2
317
+
318
+ loc = response_shift + response_multiplier * effect
319
+ scale = (1+effect/2) * jitter_scale
320
+ idx_prob_dist = gen_dist(loc=loc, scale=scale)
321
+ data[efield_idx] = idx_prob_dist.rvs()
322
+
323
+ else:
324
+ print("Selected network type not supported!")
325
+ raise NotImplementedError()
326
+
327
+ return data
328
+
329
+
330
+ def binarize_response_data(response_values, method, bin_factor):
331
+ """
332
+ Binarizes the response data according to the selected method.
333
+
334
+ Parameters
335
+ ----------
336
+ response_values : numpy.ndarray
337
+ (n_zaps) Response data.
338
+ method : str
339
+ Method of calculating the binarization threshold. ("mean", "slope", "median")
340
+ bin_factor : float
341
+ Factor multiplied with threshold for splitting the data.
342
+ (bin_factor * method(response)) is the threshold used for categorizing the data.
343
+
344
+ Returns
345
+ -------
346
+ response_bin : numpy.ndarray
347
+ (n_zaps) Binarized response data, where values greater than the threshold are set to 1, and values
348
+ less than or equal to the threshold are set to 0.
349
+ """
350
+ # compute binarization threshold according to chosen method
351
+ if method == 'mean':
352
+ bin_thr = np.mean(response_values) * bin_factor
353
+ elif method == 'median':
354
+ bin_thr = np.median(response_values) * bin_factor
355
+ elif method == 'slope':
356
+ meps_sorted = np.sort(response_values)
357
+
358
+ # sliding window averaging with window size N = 100
359
+ # This will result in a shorter array as the boundary values are
360
+ # discarded to avoid boundary effects (sliding window is reaching
361
+ # 'over' the boundaries of the array)
362
+ meps_sorted_smoothed = np.convolve(meps_sorted, np.ones((100,)) / 100, mode='valid')
363
+ num_discarded_values = meps_sorted.shape[0] - meps_sorted_smoothed.shape[0]
364
+
365
+ meps_sorted_smoothed_gradient = np.gradient(meps_sorted_smoothed)
366
+
367
+ # ignore the first and last 5 % of the values when searching for the maximum
368
+ # (there is commonly a maximum towards the end)
369
+ search_area = (
370
+ int(meps_sorted_smoothed.shape[0] * 0.1),
371
+ int(meps_sorted_smoothed.shape[0] - meps_sorted_smoothed.shape[0] * 0.1)
372
+ )
373
+
374
+ max_slope_idx = np.argmax(meps_sorted_smoothed_gradient[search_area[0]:search_area[1]])
375
+ max_slope_idx += int(num_discarded_values / 2) + search_area[0]
376
+
377
+ bin_thr = meps_sorted[max_slope_idx] * bin_factor
378
+
379
+ # binarize every data point
380
+ response_bin = np.zeros(response_values.shape)
381
+ for i in range(0, response_values.shape[0]):
382
+ if response_values[i] > bin_thr:
383
+ response_bin[i] = 1
384
+ else:
385
+ response_bin[i] = 0
386
+
387
+ return response_bin
388
+
389
+
390
+ def binarize_real_meps(meps, method='kde'):
391
+ """
392
+ Binarizes real MEP measurements.
393
+
394
+ Parameters
395
+ ----------
396
+ meps : numpy.ndarray
397
+ (n_zaps) The MEP measurements.
398
+ method : str
399
+ The method for determining the binarization threshold. Valid options are:
400
+ - 'kde': Determine the threshold based on the density of the measurement points.
401
+ The point density is low when the value of the MEPs is rapidly changing.
402
+ - 'slope': Determine the threshold based on the derivative/slope.
403
+
404
+ Returns
405
+ -------
406
+ numpy.ndarray
407
+ An array of the same shape as `meps` with 1 for data points below the determined threshold,
408
+ and 0 otherwise.
409
+ """
410
+ split_at = 0
411
+
412
+ if method == 'kde':
413
+
414
+ # estimate optimal bandwidth
415
+ bandwidth = None
416
+ q25, q75 = np.percentile(meps, [25, 75])
417
+ bandwidth = .9 * min(np.std(meps), q75 - q25) * pow(meps.shape[0], -0.2)
418
+
419
+ kde = KernelDensity(
420
+ kernel='gaussian',
421
+ bandwidth=bandwidth
422
+ ).fit(np.reshape(meps, (-1, 1)))
423
+
424
+ x_grid = np.linspace(0, max(meps))
425
+ values_on_grid = kde.score_samples(x_grid.reshape(-1, 1))
426
+
427
+ # use the first occurrence (smallest/minimum index) of a minimum as threshold
428
+ split_at = min(x_grid[argrelextrema(values_on_grid, np.less)[0]])
429
+
430
+ elif method == 'slope':
431
+ meps_sorted = np.sort(meps)
432
+
433
+ # sliding window averaging with window size N = 100
434
+ # This will result in a shorter array as the boundary values are
435
+ # discarded to avoid boundary effects (sliding window is reaching
436
+ # 'over' the boundaries of the array)
437
+ meps_sorted_smoothed = np.convolve(meps_sorted, np.ones((100,)) / 100, mode='valid')
438
+ num_discarded_values = meps_sorted.shape[0] - meps_sorted_smoothed.shape[0]
439
+
440
+ meps_sorted_smoothed_gradient = np.gradient(meps_sorted_smoothed)
441
+
442
+ # ignore the first and last 5 % of the values when searching for the maximum
443
+ # (there is commonly a maximum towards the end)
444
+ search_area = (
445
+ int(meps_sorted_smoothed.shape[0] * 0.1),
446
+ int(meps_sorted_smoothed.shape[0] - meps_sorted_smoothed.shape[0] * 0.1)
447
+ )
448
+
449
+ max_slope_idx = np.argmax(meps_sorted_smoothed_gradient[search_area[0]:search_area[1]])
450
+ max_slope_idx += int(num_discarded_values / 2) + search_area[0]
451
+
452
+ split_at = meps_sorted[max_slope_idx]
453
+
454
+ # debug; remove later
455
+ plt.figure()
456
+ plt.scatter(x=range(0, meps_sorted.shape[0]), y=meps_sorted)
457
+ plt.scatter(
458
+ x=range(meps_sorted_smoothed.shape[0] + int(num_discarded_values / 2)),
459
+ y=np.pad(meps_sorted_smoothed, (int(num_discarded_values / 2), 0))
460
+ )
461
+ plt.figure()
462
+ plt.scatter(x=range(0, meps_sorted_smoothed_gradient.shape[0]), y=meps_sorted_smoothed_gradient)
463
+ plt.show()
464
+
465
+ else:
466
+ print("Invalid 'method' selected.")
467
+
468
+ print(f"Split at: {split_at}")
469
+
470
+ # to be consistent with the function "create_binary_MEP", we use
471
+ # - 0 for values below the threshold (no response)
472
+ # - 1 for values above the threshold (response)
473
+ return np.where(meps > split_at, 1, 0)
474
+
475
+
476
+ def plot_data_std(response_data, e_field0, e_field1):
477
+ """
478
+ Plot the response data within axes representing the efields of the respective hotspots.
479
+
480
+ Parameters
481
+ ----------
482
+ response_data : np.ndarray
483
+ (n_zaps) The response data values.
484
+ e_field0 : np.ndarray
485
+ The efields of hotspot 0.
486
+ e_field1 : np.ndarray
487
+ The efields of hotspot 1.
488
+
489
+ Returns
490
+ -------
491
+ plt.pyplot
492
+ The matplotlib.pyplot object containing the plot.
493
+ """
494
+ plt.close('all')
495
+ x = e_field0
496
+ y = e_field1
497
+ z = response_data
498
+
499
+ # create plot_std
500
+ plt.scatter(x, y, c=z, cmap='coolwarm', linewidth=0.5, edgecolor='grey')
501
+ plt.xlabel(f'E-field $h_0$', fontsize=12, labelpad=4)
502
+ plt.ylabel(f'E-field $h_1$', fontsize=12, labelpad=4)
503
+ plt.gca().spines['right'].set_visible(False)
504
+ plt.gca().spines['top'].set_visible(False)
505
+ plt.gca().spines['left'].set_visible(True)
506
+ plt.gca().spines['bottom'].set_visible(True)
507
+ plt.colorbar()
508
+ #plt.title(f'{response_data.shape[0]} data points with color indicating response value')
509
+ corr = scipy.stats.pearsonr(x, y)
510
+ plt.title(f'{response_data.shape[0]} data points - '
511
+ f'efield correlation: {np.round(corr[0], 2)}')
512
+ return plt
513
+
514
+
515
+ def plot_data_bin(response_data_bin, e_field0, e_field1):
516
+ """
517
+ Plot the binarized response data within axes representing the efields of the respective hotspots.
518
+
519
+ Parameters
520
+ ----------
521
+ response_data_bin : np.ndarray
522
+ (n_zaps) The binarized response data.
523
+ e_field0 : np.ndarray
524
+ The efields of hotspot 0.
525
+ e_field1 : np.ndarray
526
+ The efields of hotspot 1.
527
+
528
+ Returns
529
+ -------
530
+ plt.pyplot
531
+ The matplotlib.pyplot object containing the plot.
532
+ """
533
+ plt.close('all')
534
+ x = e_field0
535
+ y = e_field1
536
+ z = response_data_bin
537
+
538
+ # create plot_bin
539
+ plt.scatter(x, y, c=z, cmap='RdYlBu_r', vmin=-0.25, vmax=1.25, linewidth=0.5, alpha=0.95, edgecolors='grey')
540
+ plt.xlabel(f'E-field $h_0$', fontsize=12, labelpad=4)
541
+ plt.ylabel(f'E-field $h_1$', fontsize=12, labelpad=4)
542
+ plt.gca().spines['right'].set_visible(False)
543
+ plt.gca().spines['top'].set_visible(False)
544
+ plt.gca().spines['left'].set_visible(True)
545
+ plt.gca().spines['bottom'].set_visible(True)
546
+ #plt.colorbar(ticks=[0,1])
547
+ #plt.title(f'{response_data_bin.shape[0]} data points with color indicating category of response value ')
548
+
549
+ return plt
550
+
551
+
552
+ def plot_data_clf(response_data_bin, e_field0, e_field1):
553
+ """
554
+ Plot the binarized response data within axes representing the efields of the respective hotspots.
555
+ The background of the scatterplot will be colored according to the trained classifier's predictions for the data.
556
+ Also plots the structure of the decision tree used to classify the data.
557
+
558
+ Parameters
559
+ ----------
560
+ response_data_bin : np.ndarray
561
+ (n_zaps) The binarized response data.
562
+ e_field0 : np.ndarray
563
+ The efields of hotspot 0.
564
+ e_field1 : np.ndarray
565
+ The efields of hotspot 1.
566
+
567
+ Returns
568
+ -------
569
+ Tuple[plt.pyplot, plt.pyplot]
570
+ A tuple containing the matplotlib.pyplot objects for the decision tree and the for the scatterplot.
571
+ """
572
+ plt.close('all')
573
+ x = e_field0
574
+ y = e_field1
575
+ z = response_data_bin
576
+
577
+ # set up classifier
578
+ num_coil_samples = response_data_bin.shape[0]
579
+ min_samples_leaf = int(0.05 * num_coil_samples)
580
+ print(f'min_samples_leaf for plotting: {min_samples_leaf}')
581
+
582
+ clf = tree.DecisionTreeClassifier(max_depth=2, min_samples_leaf=min_samples_leaf)
583
+ samples = np.vstack((e_field0, e_field1)).transpose()
584
+ res = clf.fit(samples, response_data_bin)
585
+ score = clf.score(samples, response_data_bin)
586
+
587
+ print(f'accuracy: {score}')
588
+ print(f'feature_importances_: {res.feature_importances_}')
589
+ print(f'tree_.feature: {clf.tree_.feature}')
590
+
591
+ # plot decision tree
592
+ tree_plt = plt.figure()
593
+ tree.plot_tree(clf)
594
+ plt.close('all')
595
+
596
+ # determine boundaries of the classification overlay
597
+ grid_x_min, grid_x_max = np.min(e_field0), np.max(e_field0)
598
+ grid_y_min, grid_y_max = np.min(e_field1), np.max(e_field1)
599
+ # define a regular grid between the boundaries of the data
600
+ grid_x_coords, grid_y_coords = \
601
+ np.meshgrid(np.arange(grid_x_min, grid_x_max+0.005, .005),
602
+ np.arange(grid_y_min, grid_y_max+0.005, .005))
603
+ # have the classifier determine the class of the grid points
604
+ grid_points_predicted = clf.predict(
605
+ np.c_[grid_x_coords.ravel(), grid_y_coords.ravel()]
606
+ )
607
+ # Put the result into a color plot
608
+ grid_points_predicted = grid_points_predicted.reshape(grid_x_coords.shape)
609
+
610
+ xmin, xmax = np.min(x), np.max(x)
611
+ ymin, ymax = np.min(y), np.max(y)
612
+
613
+ # create plot_clf
614
+ plt.scatter(x, y, c=z, cmap='RdYlBu_r', vmin=-0.25, vmax=1.25, linewidth=0.5, alpha=0.95, edgecolors='grey')
615
+ xmin, xmax = plt.xlim()
616
+ ymin, ymax = plt.ylim()
617
+ plt.contourf(grid_x_coords, grid_y_coords, grid_points_predicted, cmap='RdYlBu_r', alpha=0.15)
618
+ plt.xlim(xmin, xmax)
619
+ plt.ylim(ymin, ymax)
620
+
621
+ plt.xlabel(f'E-field $h_0$', fontsize=12, labelpad=4)
622
+ plt.ylabel(f'E-field $h_0$', fontsize=12, labelpad=4)
623
+ plt.gca().spines['right'].set_visible(False)
624
+ plt.gca().spines['top'].set_visible(False)
625
+ plt.gca().spines['left'].set_visible(True)
626
+ plt.gca().spines['bottom'].set_visible(True)
627
+ # plt.colorbar(ticks=[0,1])
628
+ #plt.title(f'{response_data_bin.shape[0]} data points with color indicating category of response value')
629
+ plt.title(f'Classification Accuracy: {np.round(score, 2)}')
630
+
631
+ return tree_plt, plt
632
+
633
+
634
+ def write_effect_map_hdf5(datatype, e_matrix, roi_surf, detection_result, base_path, config):
635
+ """
636
+ Create effect maps for result validation. The idea is that given a detection result, especially the hotspot_idcs and
637
+ network_type, a prediction can be made about where the response is supposed to be affected most.
638
+ Comparing these predictions and real response measurements is a possibility for result validation.
639
+ See output_documentation.md for more.
640
+
641
+ Parameters
642
+ ----------
643
+ datatype : str
644
+ Whether the response data is 'real' or 'artificial'.
645
+ e_matrix : np.ndarray of float
646
+ (n_zaps, n_elms) The efield magnitudes of all available coil positions across all ROI elements.
647
+ roi_surf : ROI obj
648
+ ROI surface object.
649
+ detection_result : np.ndarray
650
+ (7) contains the result of the detection, consisting of
651
+ (found_network_type, found_idcs, found_acc, found_distance, found_scores, network_type_certainty, shape_vector).
652
+ base_path : str
653
+ Path to the folder where results should end up.
654
+ config : dict
655
+ YAML configuration file content as a dictionary.
656
+ """
657
+
658
+ print("Effect values are being calculated.")
659
+
660
+ effect_full = config['effect_full']
661
+ effect_saturation = config['effect_saturation']
662
+
663
+ # calculate effect for each element based on the hotspot e_mag values
664
+ network_type = detection_result[0]
665
+ found_idcs = detection_result[1]
666
+ effect = np.zeros(e_matrix.shape[1])
667
+ for elm in range(0, e_matrix.shape[1]):
668
+ # 'selection' represents the specific sample of the available samples where element 'elm' is stimulated highest
669
+ # (a loose approximation of what would happen if this element was stimulated)
670
+ selection = np.argmax(e_matrix[:, elm])
671
+ e_subset = e_matrix[selection]
672
+
673
+ # can also go with different stimulation position from here, instead of the proxy
674
+ effect_vals = [np.zeros(e_matrix.shape[1]), np.zeros(e_matrix.shape[1])]
675
+ for idx in [0,1]:
676
+ if not np.isnan(found_idcs[idx]):
677
+ effect_vals[idx] = min(e_subset[found_idcs[idx]] / effect_full, effect_saturation)
678
+
679
+ if network_type == 1:
680
+ effect[elm] = 0
681
+ elif network_type == 2:
682
+ effect[elm] = (effect_vals[0] * effect_vals[1]) ** 2 # \in [0, 1.44]
683
+ elif network_type == 3:
684
+ effect[elm] = max((effect_vals[0] ** 2 - effect_vals[1] ** 2), 0)
685
+ elif network_type == 4:
686
+ effect[elm] = effect_vals[0] ** 4
687
+ elif network_type == 5:
688
+ effect[elm] = max((effect_vals[1] ** 2 - effect_vals[0] ** 2), 0)
689
+ elif network_type == 6:
690
+ effect[elm] = effect_vals[1] ** 4
691
+ elif network_type == 7:
692
+ if effect_vals[0] > effect_vals[1]:
693
+ effect[elm] = effect_vals[0] ** 2 - effect_vals[1] ** 2
694
+ else:
695
+ effect[elm] = effect_vals[1] ** 2 - effect_vals[0] ** 2
696
+ elif network_type == 8:
697
+ effect[elm] = (effect_vals[0] ** 4 + effect_vals[1] ** 4)/2
698
+ else:
699
+ raise NotImplementedError()
700
+
701
+ if datatype == 'artificial':
702
+ # output naming
703
+ jitter = (config['jitter_ratio'], config['jitter_scale'])
704
+ rn_seed = config['rn_seed']
705
+ flag = f"jitter_{jitter}_seed_{rn_seed}"
706
+ else:
707
+ flag = config['fn_flag']
708
+ # output naming
709
+ fn_out_roi_effectmap = f"{base_path}/effectmap_{flag}.hdf5"
710
+ fn_out_roi_geo = f"{base_path}/effectmap_{flag}_geo.hdf5"
711
+
712
+ # save data as hdf5 _geo file (mapped)
713
+ print(" > Creating .hdf5 geo files (mapped brain and roi) ...")
714
+ while True:
715
+ try:
716
+ pynibs.write_geo_hdf5_surf(out_fn=fn_out_roi_geo,
717
+ points=roi_surf.node_coord_mid,
718
+ con=roi_surf.node_number_list,
719
+ replace=True,
720
+ hdf5_path='/mesh')
721
+ pynibs.write_data_hdf5_surf(data=effect,
722
+ data_names=['effect_map'],
723
+ data_hdf_fn_out=fn_out_roi_effectmap,
724
+ geo_hdf_fn=fn_out_roi_geo,
725
+ replace=True)
726
+ break
727
+ except:
728
+ print('problem accessing effectmap hdf5')
729
+ time.sleep(1)
730
+
731
+ print(f'Saved in folder: {base_path} \n **** \n ')
732
+
733
+
734
+ def write_network_detection_data_hdf5(datatype, e_matrix, response_values, base_path, config):
735
+ """
736
+ Preprocessing experimental data for the network detection algorithm. Either artificial response data is generated or
737
+ real response data used. In the YAML files in the bin_configuration folder, the parameters
738
+ or real measurements are used. In either case, the data is binarized (classified into 1: affected, and 0: not
739
+ affected).
740
+ Different data plotting options are included.
741
+
742
+ Parameters
743
+ ----------
744
+ datatype : str
745
+ Whether response data needs to be generated ('artificial') or real experimental data is available ('real').
746
+ e_matrix : np.ndarray of float
747
+ (n_zaps, n_elms) The efield magnitudes of all available coil positions across all ROI elements.
748
+ response_values : np.ndarray
749
+ Real response values if available, or 'None' in artificial case.
750
+ base_path : str
751
+ Path to the folder where results should end up.
752
+ config : dict
753
+ YAML configuration file content as a dictionary.
754
+
755
+ Returns
756
+ -------
757
+ runtime_gen : float
758
+ Running time of data generation.
759
+ e_subset : np.ndarray of float
760
+ (n_zaps, n_elms) The efield magnitudes of the used coil positions across all ROI elements.
761
+ response : np.ndarray
762
+ (2, n_zaps) Response values and binarized values in the form [response_values, response_binarized].
763
+ """
764
+ start = time.time()
765
+
766
+ if datatype == 'artificial':
767
+ print('Artificial response data will now be generated and binarized.')
768
+ num_coil_samples = config['sample_size']
769
+ jitter = (config['jitter_ratio'], config['jitter_scale'])
770
+ rn_seed = config['rn_seed']
771
+ hotspot_idcs = (config['hotspot_elm0'], config['hotspot_elm1'])
772
+ fn_out_data = f"{base_path}/data_jitter_{jitter}_seed_{rn_seed}.hdf5" # nnn
773
+
774
+ # (1.1) Pick e-fields as random samples from coil positions and orientations
775
+ idcs = pynibs.determine_coil_position_idcs(num_idcs=num_coil_samples, all_efields=e_matrix, rn_seed=rn_seed)
776
+ e_subset = e_matrix[idcs]
777
+
778
+ # (1.2) generate response data
779
+ response_values = pynibs.create_response_data(efields=e_subset, config=config)
780
+
781
+ if datatype == 'real':
782
+ print('Real data will now be binarized.')
783
+ flag = config['fn_flag']
784
+ fn_out_data = f"{base_path}/data_{flag}.hdf5" # nnn
785
+ e_subset = e_matrix
786
+
787
+ response_bin = pynibs.binarize_response_data(response_values, config['bin_method'],
788
+ bin_factor=config['bin_factor'])
789
+ response = np.vstack((response_values, response_bin))
790
+ stop = time.time()
791
+ runtime_gen = np.round(stop - start, 2)
792
+
793
+ # (3) save data hdf5
794
+ if config['save_files']:
795
+ os.makedirs(base_path, exist_ok=True)
796
+ while True:
797
+ try:
798
+ with h5py.File(fn_out_data, 'w') as f:
799
+ f.create_dataset(
800
+ 'response_data',
801
+ data=response
802
+ )
803
+ f.create_dataset(
804
+ 'e_subset',
805
+ data=e_subset
806
+ )
807
+ break
808
+ except:
809
+ print('problem accessing data hdf5')
810
+ time.sleep(1)
811
+ print(f'Saved data in {runtime_gen} s under:{fn_out_data} \n **** \n ')
812
+
813
+ # (4) plot artificial response data on plain spanned by both hotspot efields
814
+ if datatype == 'artificial':
815
+ flag = f'jitter_{jitter}_seed_{rn_seed}_hotspots'
816
+ if config['plot_std']:
817
+ std_plt = pynibs.plot_data_std(response[0], e_subset[:, hotspot_idcs[0]], e_subset[:, hotspot_idcs[1]])
818
+ fn_std_plt = os.path.join(base_path,
819
+ f'plot_std_{flag}.png') # nnn
820
+ std_plt.savefig(fn_std_plt, dpi=600)
821
+ std_plt.close()
822
+ if config['plot_bin']:
823
+ bin_plt = pynibs.plot_data_bin(response[1], e_subset[:, hotspot_idcs[0]], e_subset[:, hotspot_idcs[1]])
824
+ fn_bin_plt = os.path.join(base_path,
825
+ f'plot_bin_{flag}.png') # nnn
826
+ bin_plt.savefig(fn_bin_plt, dpi=600)
827
+ bin_plt.close()
828
+ if config['plot_curves']:
829
+ plot_idx0 = hotspot_idcs[0]
830
+
831
+ plt_curve = pynibs.plot_data_bin(np.zeros(response[1].shape), e_subset[:, plot_idx0], response[0])
832
+ plt_curve.ylabel('response')
833
+ plt_curve.savefig(f'{base_path}/plot_{plot_idx0}_curve_{flag}.png', dpi=600)
834
+ plt_curve.close()
835
+
836
+ plot_idx1 = hotspot_idcs[1]
837
+
838
+ plt_curve1 = pynibs.plot_data_bin(np.zeros(response[1].shape), e_subset[:, plot_idx1], response[0])
839
+ plt_curve1.ylabel('response')
840
+ plt_curve1.xlabel('E-field $h_1')
841
+ plt_curve1.savefig(f'{base_path}/plot_{plot_idx1}_curve_{flag}.png', dpi=600)
842
+ plt_curve1.close()
843
+
844
+ return e_subset, response
845
+
846
+
847
+ def determine_scoring_idcs(e_subset, scoring_emag_thr=0, scoring_interval=22, method='optimized', fn_geo=None,
848
+ required_idcs=None):
849
+ """
850
+ Computes which elements to consider in the score calculations.
851
+
852
+ Parameters
853
+ ----------
854
+ e_subset : np.ndarray of float
855
+ (n_zaps, n_elms) The efield magnitudes of the used coil positions across all ROI elements.
856
+ scoring_emag_thr : float, default: 0
857
+ Threshold to control stimulation accessibility of the scored elements. Elements with a stimulation peak
858
+ below this threshold are ignored.(high thr -> only gyral elements are scored).
859
+ (method 'optimized' not possible with an active threshold in place.)
860
+ scoring_interval : int, default: 22
861
+ The resolution for selecting elements. Only every scoring_interval-th element is considered.
862
+ method : str, default: 'optimized'
863
+ Specify method for selecting scoring indices.
864
+
865
+ ** Random elements across the ROI are picked ('random'),
866
+ ** Optimal elements are chosen by maximizing the distances between them. ('optimized') (Only applicable without
867
+ scoring emag threshold. Requires fn_geo.)
868
+
869
+ fn_geo : str, optional
870
+ Required to compute element distances if method='optimized'.
871
+ required_idcs : np.ndarray of int, optional
872
+ Any indices that should definitely be included in scoring process, regardless of subsampling. (e.g. hotspots)
873
+
874
+ Returns
875
+ -------
876
+ np.ndarray
877
+ Array of indices representing the elements to consider in the score calculations.
878
+ """
879
+ if scoring_interval == 1:
880
+ return np.arange(e_subset.shape[1])
881
+
882
+ else:
883
+ num_subsample = int(e_subset.shape[1] / scoring_interval)
884
+
885
+ if method == 'random':
886
+ # get maximum e-value over all data manifestations per element to apply scoring e-field threshold.
887
+ # Where max > thr: 1, where max < thr: 0
888
+ idcs = np.where(np.max(e_subset[:, :], 0) > scoring_emag_thr, 1, 0)
889
+ scoring_idcs = np.nonzero(idcs)
890
+ scoring_idcs = scoring_idcs[0]
891
+
892
+ # now apply scoring interval: select only every scoring_interval-th element
893
+ scoring_idcs = scoring_idcs[scoring_idcs % scoring_interval == 0]
894
+
895
+ if method == 'optimized':
896
+ with h5py.File(fn_geo, 'r') as f:
897
+ mesh = trimesh.Trimesh(
898
+ vertices=f['mesh/nodes/node_coord'][:],
899
+ faces=f['mesh/elm/triangle_number_list'][:]
900
+ )
901
+
902
+ pts, scoring_idcs = trimesh.sample.sample_surface_even(
903
+ mesh=mesh,
904
+ count=num_subsample)
905
+ # add additional desired index
906
+ if required_idcs is not None:
907
+ scoring_idcs = np.append(scoring_idcs, required_idcs)
908
+ scoring_idcs = np.sort(scoring_idcs)
909
+
910
+ return scoring_idcs
911
+
912
+
913
+ def compute_scores_with_single_element_clf(element_idx, efields, data, weights=None, scoring_idcs=None):
914
+ """
915
+ Computes the classifier (DecisionTree) score of element 'element_idx' with all other elements within the ROI.
916
+
917
+ Parameters
918
+ ----------
919
+ element_idx : int
920
+ The index of the element whose score with all other elements should be computed.
921
+ efields : np.ndarray
922
+ (n_zaps, n_elms) The efields in the ROI of all investigated coil positions.
923
+ data : np.ndarray
924
+ (n_zaps) The response corresponding to each coil position.
925
+ weights : np.ndarray, optional
926
+ (n_zaps) Weights to weight the data points.
927
+ scoring_idcs : np.ndarray, optional
928
+ Indices of ROI elements to compute scores for. Decreasing the number of elements significantly enhances runtime.
929
+ If None, scores will be computed for all ROI elements.
930
+
931
+ Returns
932
+ -------
933
+ tuple
934
+ A tuple with the index of the investigated element and an array containing the scores of this element with each
935
+ other array element.
936
+ """
937
+ dim = efields.shape[1]
938
+ scores = np.zeros(dim)
939
+
940
+ num_coil_samples = data.shape[0]
941
+ min_samples_leaf = max(int(0.05*num_coil_samples), 1)
942
+
943
+ clf = tree.DecisionTreeClassifier(max_depth=2, min_samples_leaf=min_samples_leaf) # ccc
944
+ # tried different parameter settings and adding max_leaf_nodes=3, didn't improve performance
945
+
946
+ elmts_to_iterate_over = scoring_idcs if scoring_idcs is not None else range(dim)
947
+
948
+ for i in elmts_to_iterate_over:
949
+ if i > element_idx:
950
+ stacked_efields = np.vstack((efields[:, element_idx], efields[:, i])).transpose()
951
+ clf.fit(stacked_efields, data, sample_weight=weights)
952
+ scores[i] = clf.score(stacked_efields, data)
953
+
954
+ return element_idx, scores
955
+ # return scores
956
+
957
+
958
+ def compute_scores_with_single_element_regression(element_idx, efields, data, weights=None, scoring_idcs=None):
959
+ """
960
+ Computes the multivariate regression R2-score of element 'element_idx' with all other elements within the ROI.
961
+
962
+ Parameters
963
+ ----------
964
+ element_idx : int
965
+ The index of the element whose score with all other elements should be computed.
966
+ efields : np.ndarray
967
+ (n_zaps, n_elms) The efields in the ROI of all investigated coil positions.
968
+ data : np.ndarray
969
+ (n_zaps) The response corresponding to each coil position.
970
+ weights : np.ndarray, optional
971
+ (n_zaps) Weights to weight the data points.
972
+ scoring_idcs : np.ndarray, optional
973
+ Indices of ROI elements to compute scores for. Decreasing the number of elements significantly enhances runtime.
974
+ If None, scores will be computed for all ROI elements.
975
+
976
+ Returns
977
+ -------
978
+ tuple
979
+ A tuple with the index of the investigated element and an array containing the scores of this element with each
980
+ other array element.
981
+ """
982
+ dim = efields.shape[1]
983
+ scores = np.zeros(dim)
984
+ num_coil_samples = data.shape[0]
985
+ min_samples_leaf = max(int(0.05*num_coil_samples), 1)
986
+ if VERBOSE:
987
+ print(f'min_samples_leaf for scoring: {min_samples_leaf}')
988
+
989
+ elmts_to_iterate_over = scoring_idcs if scoring_idcs is not None else range(dim)
990
+
991
+ for i in elmts_to_iterate_over:
992
+ if i > element_idx:
993
+
994
+ data_var = np.var(data)
995
+ df = pd.DataFrame({
996
+ 'a': pd.Series(efields[:, element_idx]),
997
+ 'b': pd.Series(efields[:, i]),
998
+ 'target': pd.Series(data)
999
+ })
1000
+
1001
+ #def sigmoid_multi(x, y, x0=1, r=1, amp=3):
1002
+ # return amp / (1 + np.exp(-r * (x+y - x0)))
1003
+ #model = lmfit.Model(sigmoid_multi, independent_vars=['x', 'y'])
1004
+ #fit = model.fit(df['target'], x=df['a'], y=df['b'])
1005
+ #scores[i] = 1 - np.var(fit.residual) / data_var
1006
+
1007
+ def gaussian_multi(x,y,amp=1,x0=0,y0=0,sigma_x=1,sigma_y=1):
1008
+ return amp * np.exp(-0.5* (((x-x0) / sigma_x)**2 + ((y-y0)/sigma_y)**2))
1009
+
1010
+ model = lmfit.Model(gaussian_multi, independent_vars=['x','y'])
1011
+ fit = model.fit(df['target'],x=df['a'], y=df['b'])
1012
+
1013
+ scores[i] = 1 - np.var(fit.residual) / data_var
1014
+
1015
+ return element_idx, scores
1016
+
1017
+
1018
+ def compute_scores_with_all_elements(efields, data, weights=None, scoring_idcs=None, scoring_method='clf'):
1019
+ """
1020
+ Computes the classifier (DecisionTree) score of each combination of ROI elements
1021
+ (upper triangle matrix of the score matrix).
1022
+
1023
+ Parameters
1024
+ ----------
1025
+ efields : np.ndarray of float
1026
+ (n_zaps, n_elms) The efields in the ROI of all investigated coil positions.
1027
+ data : np.ndarray
1028
+ (n_zaps) The response corresponding to each zap.
1029
+ weights : np.ndarray, optional
1030
+ (n_zaps) Weights to weight the data points.
1031
+ scoring_idcs : np.ndarray, optional
1032
+ Indices of ROI elements to compute scores for. Decreasing the number of elements significantly enhances runtime.
1033
+ If None, scores will be computed for all ROI elements.
1034
+ scoring_method : str, optional
1035
+ Select 'clf' for Decision Tree Classifier, 'regression' for multivariable regression method,
1036
+ 'regress_data' for one-dimensional regression method.
1037
+
1038
+ Returns
1039
+ -------
1040
+ np.ndarray of float
1041
+ (n_elms, n_elms) An upper triangle matrix containing the scores of each ROI element with all others.
1042
+ """
1043
+ dim = efields.shape[1]
1044
+ scores = np.zeros((dim,dim))
1045
+
1046
+ if scoring_method == 'clf':
1047
+ for j in range(dim):
1048
+ element_idx, scores_row = pynibs.compute_scores_with_single_element_clf(j,efields, data[1], weights, scoring_idcs)
1049
+ scores[element_idx] = scores_row
1050
+ elif scoring_method == 'regression':
1051
+ for j in range(dim):
1052
+ element_idx, scores_row = pynibs.compute_scores_with_single_element_regression(j,efields, data[0], weights, scoring_idcs)
1053
+ scores[element_idx] = scores_row
1054
+
1055
+ return scores
1056
+
1057
+
1058
+ def compute_scores_with_all_elements_MP(efields, data, weights=None, scoring_idcs=None, scoring_method='clf'):
1059
+ """
1060
+ Computes the classifier (DecisionTree) score of each combination of ROI elements
1061
+ (multi-core enabled, upper triangle matrix of the score matrix).
1062
+
1063
+ Parameters
1064
+ ----------
1065
+ efields : np.ndarray of float
1066
+ (n_zaps, n_elms) The efields in the ROI of all investigated coil positions.
1067
+ data : np.ndarray of float
1068
+ (n_zaps) The response corresponding to each coil position.
1069
+ weights : np.ndarray, optional
1070
+ (n_zaps) Weights to weight the data points.
1071
+ scoring_idcs : np.ndarray, optional
1072
+ Indices of ROI elements to compute scores for. Decreasing the number of elements significantly enhances runtime.
1073
+ If None, scores will be computed for all ROI elements.
1074
+ scoring_method : str, optional
1075
+ Select 'clf' for Decision Tree Classifier, 'regression' for multivariable regression method,
1076
+ 'regress_data' for one-dimensional regression method.
1077
+
1078
+
1079
+ Returns
1080
+ -------
1081
+ np.ndarray of float
1082
+ An upper triangle matrix containing the scores of each ROI element with all others.
1083
+ """
1084
+ dim = efields.shape[1]
1085
+ scores = np.zeros((dim,dim))
1086
+
1087
+ elmts_to_iterate_over = scoring_idcs if scoring_idcs is not None else range(dim)
1088
+
1089
+ if scoring_method == 'clf': # clf version: data[1] is used (binarized)
1090
+ num_processes=multiprocessing.cpu_count()
1091
+ with multiprocessing.Pool(processes=num_processes) as pool:
1092
+ # @TODO: monitor if chunksize=1 impairs performance; it was set to 1 to have a smoother tdqm-progress bar.
1093
+ mp_res = pool.starmap(
1094
+ pynibs.compute_scores_with_single_element_clf,
1095
+ tqdm(
1096
+ [(j,efields,data[1],weights,scoring_idcs) for j in elmts_to_iterate_over],
1097
+ total=len(elmts_to_iterate_over)
1098
+ ),
1099
+ chunksize=1
1100
+ )
1101
+ pool.close()
1102
+ pool.join()
1103
+
1104
+ elif scoring_method == 'regression': # regression version: data[0] is used (not yet binarized)
1105
+ num_processes = multiprocessing.cpu_count()
1106
+ with multiprocessing.Pool(processes=num_processes) as pool:
1107
+ # @TODO: monitor if chunksize=1 impairs performance; it was set to 1 to have a smoother tdqm-progress bar.
1108
+ mp_res = pool.starmap(
1109
+ pynibs.compute_scores_with_single_element_regression,
1110
+ tqdm(
1111
+ [(j, efields, data[0], weights, scoring_idcs) for j in elmts_to_iterate_over],
1112
+ total=len(elmts_to_iterate_over)
1113
+ ),
1114
+ chunksize=1
1115
+ )
1116
+ pool.close()
1117
+ pool.join()
1118
+
1119
+ for res in mp_res:
1120
+ scores[res[0]] = res[1]
1121
+ np.argmax(scores, 1)
1122
+ #iterate_idcs = np.argpartition(scores, -10)[-10:]
1123
+ return scores
1124
+
1125
+
1126
+ def hotspots_by_score_cumulative(scores):
1127
+ """
1128
+ Determine the hotspot elements within the ROI based on the cumulative scores.
1129
+ Mark the elements with the highest cumulative scores as hotspots.
1130
+
1131
+ Parameters
1132
+ ----------
1133
+ scores : np.ndarray
1134
+ (n_elms, n_elms) Upper triangle matrix of the scores.
1135
+
1136
+ Returns
1137
+ -------
1138
+ hotspots: np.ndarray
1139
+ Array with dimensions equal to the number of ROI elements, where elements are marked as hotspots.
1140
+ """
1141
+ # Choose number of ROI elements that should be marked as hotspots
1142
+ num_desired_elements = 100
1143
+ # sum the scores across each row/column
1144
+ cumulative_scores = np.sum(scores, axis=0)
1145
+ # find the top num_desired_elements cumulative scores
1146
+ hotspot_idcs = np.argpartition(-cumulative_scores, num_desired_elements)[:num_desired_elements]
1147
+ hotspots = np.zeros(scores.shape[0])
1148
+ hotspots[hotspot_idcs] = cumulative_scores[hotspot_idcs]
1149
+
1150
+ return hotspots, hotspot_idcs
1151
+
1152
+
1153
+ def hotspots_by_score_percentiles(scores, accumulated=False):
1154
+ """
1155
+ Determine the hotspot elements within the ROI based on the upper percentile of scores.
1156
+ Mark the top elements as hotspot elements.
1157
+
1158
+ Parameters
1159
+ ----------
1160
+ scores : np.ndarray
1161
+ Upper triangle matrix of the scores.
1162
+ accumulated : bool, optional
1163
+ Whether to accumulate the number of interacting hotspot elements.
1164
+ If True, the number in the hotspot array at location i denotes the number of other hotspot elements
1165
+ this element is associated with. If False, hotspots are marked with a value of 1. Default is False.
1166
+
1167
+ Returns
1168
+ -------
1169
+ hotspots: np.ndarray
1170
+ Array with dimensions equal to the number of ROI elements, where elements are marked as hotspots.
1171
+ """
1172
+ # choose number of score accuracies that should contribute to the computed percentile
1173
+ num_desired_elements = 100
1174
+ # each score is associated with two elements - so if we want n-elements as hotspots,
1175
+ # we must threshold n/2 scores. # n x n matrix of scores
1176
+ fraction_from_all_elements = (num_desired_elements/(scores.shape[0]**2))*100
1177
+ # somehow num_desired_element doesn't have the desired effect, but threshold still works well
1178
+ percentile = np.percentile(scores,100-fraction_from_all_elements)
1179
+
1180
+ hotspot_idcs = np.array(np.where(scores >= percentile))
1181
+ hotspots = np.zeros(scores.shape[0])
1182
+
1183
+ if accumulated:
1184
+ np.add.at(hotspots, hotspot_idcs.flatten(), 1)
1185
+ else:
1186
+ hotspots[ hotspot_idcs.flatten() ] = 1
1187
+
1188
+ return hotspots, hotspot_idcs
1189
+
1190
+
1191
+ def write_hotspot_scoremap_hdf5(datatype, e_subset, data, roi_surf, fn_geo, base_path, config, required_idcs=None):
1192
+ """
1193
+ Calculate the hotspot scores for given TMS experimental data and save scoremap and hotspot scores in hdf5.
1194
+ The hotspot scores indicate which ROI elements are most likely to be involved in the effect on the response variable.
1195
+ How they are computed depends on the scoring_method specified in the configuration file.
1196
+
1197
+ Parameters
1198
+ ----------
1199
+ datatype : str
1200
+ Whether the response data is 'real' or 'artificial'.
1201
+ e_subset : np.ndarray of float
1202
+ (n_zaps, n_elms) The efield magnitudes of the used coil positions across all ROI elements.
1203
+ data : np.ndarray of float
1204
+ (2, n_zaps) Two arrays, data[0] contains the response corresponding to each coil position, data[1] contains its
1205
+ binarization (1: response affected, 0: response not affected).
1206
+ roi_surf : ROI obj
1207
+ ROI surface object
1208
+ fn_geo : str
1209
+ Path to the geo.hdf5-file.
1210
+ base_path : str
1211
+ Path to the folder where results should end up.
1212
+ config : dict
1213
+ YAML configuration file content as a dictionary.
1214
+ required_idcs : np.ndarray of int, optional
1215
+ Any indices that should definitely be included in scoring process, regardless of subsampling. (e.g. hotspots)
1216
+
1217
+ Returns
1218
+ -------
1219
+ runtime_gen : float
1220
+ Running time of data generation.
1221
+ """
1222
+ print("Hotspot scores are being computed.")
1223
+ start = time.time()
1224
+
1225
+ if datatype == 'artificial':
1226
+ jitter = (config['jitter_ratio'], config['jitter_scale'])
1227
+ rn_seed = config['rn_seed']
1228
+ hotspot_idcs = (config['hotspot_elm0'], config['hotspot_elm1'])
1229
+ flag = f'jitter_{jitter}_seed_{rn_seed}'
1230
+ elif datatype == 'real':
1231
+ flag = config['fn_flag']
1232
+ else:
1233
+ raise NotImplementedError()
1234
+
1235
+ if config['save_files']:
1236
+ # output file naming
1237
+ fn_out_roi_geo = f"{base_path}/res_hotspots_{flag}_geo.hdf5"
1238
+ fn_out_roi_hotpot_data = f"{base_path}/res_hotspots_{flag}.hdf5"
1239
+ fn_out_roi_scoremap = f"{base_path}/res_scoremap_{flag}.hdf5"
1240
+
1241
+ # load e-fields and response (already binarized)
1242
+ #fn_data = f'data_{flag}.hdf5' # nnn
1243
+ #with h5py.File(f'{base_path}/{fn_data}', "r") as f:
1244
+ # e_subset = np.array(f['e_subset'])
1245
+ # data = np.array(f['response_data']) # data[0] is response, data[1] is binarized response
1246
+
1247
+ # select the relevant idcs
1248
+ scoring_emag_thr = config['scoring_emag_thr']
1249
+ scoring_interval = config['scoring_interval']
1250
+ scoring_idcs = pynibs.determine_scoring_idcs(e_subset, scoring_emag_thr, scoring_interval,
1251
+ method='optimized', fn_geo=fn_geo, required_idcs=required_idcs)
1252
+
1253
+ # calculate scores and potential hotspots
1254
+ # SINGLE NODE METHODS: regress_data and mutual information score
1255
+ if config['scoring_method'] == 'regress_data': # only for single node case
1256
+ con = roi_surf.node_number_list
1257
+ score_map = np.zeros(e_subset.shape[1])
1258
+ score_map[scoring_idcs] = pynibs.regress_data(e_matrix=e_subset,
1259
+ mep=data[0],
1260
+ elm_idx_list=scoring_idcs,
1261
+ # fun='linear',
1262
+ n_cpu=160,
1263
+ con=con,
1264
+ n_refit=0,
1265
+ return_fits=False,
1266
+ score_type='R2',
1267
+ verbose=True,
1268
+ pool=None,
1269
+ refit_discontinuities=False,
1270
+ select_signed_data=False)
1271
+ scores = score_map
1272
+ hotspots = score_map # only because in dual node approaches, this variable is needed
1273
+
1274
+ elif config['scoring_method'] == 'mi': # only for single node case
1275
+ # Compute Mutual Information for each feature
1276
+ mi_scores = fs.mutual_info_regression(e_subset[:, scoring_idcs], data[0])
1277
+ # subsampling
1278
+ scores = np.zeros(e_subset.shape[1])
1279
+ scores[scoring_idcs] = mi_scores
1280
+ hotspots = scores # only because in dual node approaches, this variable is needed
1281
+ score_map = scores
1282
+
1283
+ # DUAL NODE METHODS: classifier and regression based scoring
1284
+ else: # dual nodes expected
1285
+ scores = pynibs.compute_scores_with_all_elements_MP(e_subset, data, scoring_idcs=scoring_idcs,
1286
+ scoring_method=config['scoring_method'])
1287
+
1288
+ score_map = np.max(scores, axis=0)
1289
+
1290
+ hotspots = pynibs.hotspots_by_score_percentiles(scores, accumulated=True)[0]
1291
+
1292
+ if config['save_files']:
1293
+ # save data as hdf5 _geo file (mapped)
1294
+ print(" > Creating .hdf5 geo files (mapped brain and roi) ...")
1295
+ while True:
1296
+ try:
1297
+ pynibs.write_geo_hdf5_surf(out_fn=fn_out_roi_geo,
1298
+ points=roi_surf.node_coord_mid,
1299
+ con=roi_surf.node_number_list,
1300
+ replace=True,
1301
+ hdf5_path='/mesh')
1302
+ pynibs.write_data_hdf5_surf(data=hotspots,
1303
+ data_names='res_hotspots',
1304
+ data_hdf_fn_out=fn_out_roi_hotpot_data,
1305
+ geo_hdf_fn=fn_out_roi_geo,
1306
+ replace=True)
1307
+ pynibs.write_data_hdf5_surf(data=score_map,
1308
+ data_names='res_scoremap',
1309
+ data_hdf_fn_out=fn_out_roi_scoremap,
1310
+ geo_hdf_fn=fn_out_roi_geo,
1311
+ replace=True)
1312
+ break
1313
+ except:
1314
+ print('problem writing score hdf5')
1315
+ time.sleep(1)
1316
+
1317
+ stop = time.time()
1318
+ runtime_scores = np.round(stop - start, 2)
1319
+ print(f'Calculated scores in in {runtime_scores} s and saved in folder: {base_path} \n **** \n ')
1320
+ return runtime_scores, scores, hotspots
1321
+
1322
+
1323
+ def compute_correlation_with_all_elements(e_subset, elm_idx):
1324
+ """
1325
+ Calculate the Pearson correlation coefficient of elm_idx with every other element.
1326
+
1327
+ Parameters
1328
+ ----------
1329
+ e_subset : np.ndarray of float
1330
+ (n_zaps, n_elms) The efield magnitudes of the used coil positions across all ROI elements.
1331
+ elm_idx : int
1332
+ The index of the element to compute the correlation with.
1333
+
1334
+ Returns
1335
+ -------
1336
+ corr_coeff : np.ndarray
1337
+ (n_elms) Array containing the absolute Pearson correlation coefficients of elm_idx with each other element.
1338
+ """
1339
+
1340
+ corr_coeff = np.zeros(e_subset.shape[1])
1341
+ x = e_subset[:, elm_idx]
1342
+ for idx in range(0, e_subset.shape[1]):
1343
+ y = e_subset[:, idx]
1344
+ if y.shape[0] > 1:
1345
+ pearson_res = scipy.stats.pearsonr(x, y)
1346
+ corr_coeff[idx] = abs(pearson_res[0])
1347
+ else:
1348
+ corr_coeff[idx] = (x==y)
1349
+ return corr_coeff
1350
+
1351
+
1352
+ def find_distinct_hotspots(scorematrix, hotspot_mask, e_subset, acc_thr, corr_thr):
1353
+ """
1354
+ Returns the potential hotspots according to the scores on the ROI. The two elements with the highest scores whose
1355
+ e-fields are not too correlated to each other are chosen.
1356
+ If one score is very high, a single-hotspot case is assumed.
1357
+
1358
+ Parameters
1359
+ ----------
1360
+ scorematrix : np.ndarray
1361
+ (n_elms, n_elms) Upper triangle score matrix containing the scores of each ROI element with all others.
1362
+ hotspot_mask : np.ndarray
1363
+ (n_elms) Hotspot score of every element, derived from the scorematrix.
1364
+ e_subset : np.ndarray of float
1365
+ (n_zaps, n_elms) The efield magnitudes of the used coil positions across all ROI elements.
1366
+ acc_thr : float
1367
+ Threshold for accuracy criterion.
1368
+ corr_thr : float
1369
+ Threshold for correlation criterion.
1370
+
1371
+ Returns
1372
+ -------
1373
+ hotspot_idcs : list
1374
+ List containing the indices of the potential hotspots.
1375
+ hotspot_scores : list
1376
+ List containing the scores of the potential hotspots.
1377
+ hotspot_acc : list
1378
+ List containing the accuracy values of the potential hotspots.
1379
+ found_bool : list
1380
+ List indicating whether each potential hotspot was found.
1381
+ """
1382
+ hotspot_idcs = [np.nan, np.nan]
1383
+ hotspot_scores = [np.nan, np.nan]
1384
+ hotspot_acc = [np.nan, np.nan]
1385
+ found_bool = [False, False]
1386
+
1387
+ # first hotspot: take the max score, if multiple maximums the one with the max accuracy
1388
+
1389
+ # Find the indices of the maximum values in hotspot_mask
1390
+ max_indices = np.argwhere(hotspot_mask == np.max(hotspot_mask))
1391
+ # Create a list of (max_indices, accuracy) pairs
1392
+ hotspot_info = [(idx, np.nanmax(scorematrix[:,idx])) for idx in max_indices]
1393
+ # Find the entry with the highest accuracy
1394
+ max_entry = max(hotspot_info, key=lambda x: x[1])
1395
+ # Extract the max_index
1396
+ hotspot_idcs[0] = int(max_entry[0])
1397
+ hotspot_acc[0] = max_entry[1]
1398
+ hotspot_scores[0] = hotspot_mask[hotspot_idcs[0]]
1399
+ # check whether hotspot 0 meets hotspot criteria: accuracy threshold and score threshold.
1400
+ # The score threshold is hardcoded because it is mostly to keep very strong single hotspot cases from running too
1401
+ # long: they often have one hotspot with a very high score and every other element has score 1.
1402
+ found_bool[0] = bool((hotspot_scores[0]>=2 and hotspot_acc[0]>=acc_thr))
1403
+
1404
+ # second hotspot: next maximum score checking hotspot and correlation criteria
1405
+ if found_bool[0]:
1406
+ hotspot_mask[hotspot_idcs[0]] = 0
1407
+ correlations = pynibs.compute_correlation_with_all_elements(e_subset, hotspot_idcs[0])
1408
+
1409
+ while np.max(hotspot_mask) > 1 and not found_bool[1]:
1410
+ # Find the indices of the maximum values in hotspot_mask
1411
+ max_indices = np.argwhere(hotspot_mask == np.max(hotspot_mask))
1412
+ # Create a list of (max_indices, accuracy) pairs
1413
+ hotspot_info = [(idx, np.max(scorematrix[idx])) for idx in max_indices]
1414
+ # Find the entry with the highest accuracy
1415
+ max_entry = max(hotspot_info, key=lambda x: x[1])
1416
+ # Extract the max_index, this is the second hotspot candidate
1417
+ hot_candi = int(max_entry[0])
1418
+ hot_candi_acc = max_entry[1]
1419
+ hot_candi_scores = hotspot_mask[hot_candi]
1420
+
1421
+ # check whether hotspot candidate meets hotspot criteria, if not: ignore this one and keep looking
1422
+ if correlations[hot_candi] < corr_thr and bool((hot_candi_scores>=2 and hot_candi_acc>=acc_thr)):
1423
+ hotspot_idcs[1] = hot_candi
1424
+ hotspot_scores[1] = hot_candi_scores
1425
+ hotspot_acc[1] = hot_candi_acc
1426
+ found_bool[1] = True
1427
+ else:
1428
+ hotspot_mask[hot_candi] = 0 # ignore this one
1429
+
1430
+ return hotspot_idcs, hotspot_scores, hotspot_acc, found_bool
1431
+
1432
+
1433
+ def find_distinct_single_hotspot(hotspot_mask, acc_thr):
1434
+ """
1435
+ Returns the potential hotspot according to the scores on the ROI. The element with the highest score is chosen.
1436
+ If one score is very high, a single-hotspot case is assumed.
1437
+
1438
+ Parameters
1439
+ ----------
1440
+ hotspot_mask : np.ndarray
1441
+ (n_elms) Hotspot score of every element, derived from the scorematrix.
1442
+ acc_thr : float
1443
+ Threshold for accuracy criteria.
1444
+
1445
+ Returns
1446
+ -------
1447
+ hotspot_idcs : list
1448
+ List containing the indices of the potential hotspots.
1449
+ hotspot_scores : list
1450
+ List containing the scores of the potential hotspots.
1451
+ hotspot_acc : list
1452
+ List containing the accuracy values of the potential hotspots.
1453
+ found_bool : list
1454
+ List indicating whether each potential hotspot was found.
1455
+ """
1456
+ hotspot_idcs = [np.nan, np.nan]
1457
+ hotspot_scores = [np.nan, np.nan]
1458
+ hotspot_acc = [np.nan, np.nan]
1459
+ found_bool = [False, False]
1460
+
1461
+ # hotspot: take the max hotspot score (highest R2)
1462
+ max_entry = np.argmax(hotspot_mask)
1463
+ hotspot_idcs[0] = int(max_entry)
1464
+
1465
+ hotspot_acc[0] = hotspot_mask[hotspot_idcs[0]]
1466
+ hotspot_scores[0] = hotspot_mask[hotspot_idcs[0]]
1467
+ found_bool[0] = bool(hotspot_acc[0]>=acc_thr)
1468
+
1469
+ return hotspot_idcs, hotspot_scores, hotspot_acc, found_bool
1470
+
1471
+
1472
+ def calc_dist_pairwise(fn_geo, idx0, idx1):
1473
+ """
1474
+ Returns the geodesic distance from ROI element idx0 to idx1. In some rare cases at the border of the ROI, the
1475
+ geodesic distance cannot be measured or rather is infinite. In this case, the euclidean measure is used as an
1476
+ approximation.
1477
+
1478
+ Parameters
1479
+ ----------
1480
+ fn_geo : str
1481
+ Path to the geo.hdf5-file.
1482
+ idx0 : int
1483
+ Index of the first ROI element.
1484
+ idx1 : int
1485
+ Index of the second ROI element.
1486
+
1487
+ Returns
1488
+ -------
1489
+ distance : float
1490
+ Geodesic distance between the two ROI elements.
1491
+ """
1492
+ roi_geo_h5 = h5py.File(fn_geo, 'r')
1493
+ roi_tris = roi_geo_h5["mesh/elm/triangle_number_list"][:]
1494
+ roi_nodes = roi_geo_h5["mesh/nodes/node_coord"][:]
1495
+
1496
+ distances = pynibs.geodesic_dist(
1497
+ tris=roi_tris,
1498
+ nodes=roi_nodes,
1499
+ source=idx0,
1500
+ source_is_node=False
1501
+ )[1]
1502
+ distance = distances[idx1]
1503
+
1504
+ if distance == np.inf:
1505
+ print(f'****** Attention! ****** \n'
1506
+ f'Distance between {idx0} and {idx1} could not be measured geodesically, euclidean distance is used. \n'
1507
+ f'******************')
1508
+ distances = pynibs.euclidean_dist(
1509
+ tris=roi_tris,
1510
+ nodes=roi_nodes,
1511
+ source=idx0,
1512
+ source_is_node=False
1513
+ )[1]
1514
+ distance = distances[idx1]
1515
+
1516
+ return distance
1517
+
1518
+
1519
+ def assign_found_hotspot_single(fn_geo, hotspot_idcs, found_idx):
1520
+ """
1521
+ Assigns the found hotspot to the nearest real hotspot if there is only one hotspot found.
1522
+
1523
+ Parameters
1524
+ ----------
1525
+ fn_geo : str
1526
+ Path to the geo.hdf5-file.
1527
+ hotspot_idcs : list
1528
+ A list of length 2 containing the indices of the real hotspots.
1529
+ found_idx : int
1530
+ The index of the found hotspot.
1531
+
1532
+ Returns
1533
+ -------
1534
+ tuple
1535
+ A tuple containing the rounded distance between the real hotspot and the found hotspot,
1536
+ and the index indicating which real hotspot is assigned to the found hotspot.
1537
+ """
1538
+ print('Hotspot is assigned.')
1539
+
1540
+ # a and b real hotspots, c found hotspot
1541
+ dist_ac = pynibs.calc_dist_pairwise(fn_geo, hotspot_idcs[0], found_idx)
1542
+ dist_bc = pynibs.calc_dist_pairwise(fn_geo, hotspot_idcs[1], found_idx)
1543
+
1544
+ if dist_ac <= dist_bc:
1545
+ # then a gets assigned c
1546
+ return np.round(dist_ac, 2), 0
1547
+ else:
1548
+ # then b gets assigned c, so switch places
1549
+ return np.round(dist_bc, 2), 1
1550
+
1551
+
1552
+ def assign_found_hotspots(fn_geo, hotspot_idcs, found_idcs):
1553
+ """
1554
+ Assigns the found hotspots to the real hotspots if there are two hotspots.
1555
+
1556
+ Parameters
1557
+ ----------
1558
+ fn_geo : str
1559
+ Path to the geo.hdf5-file.
1560
+ hotspot_idcs : list
1561
+ A list of length 2 containing the indices of the real hotspots.
1562
+ found_idcs : list
1563
+ A list of length 2 containing the indices of the found hotspots.
1564
+
1565
+ Returns
1566
+ -------
1567
+ tuple
1568
+ A tuple containing the rounded distances between the real hotspots and the found hotspots,
1569
+ and the updated list of found hotspot indices in the order they were assigned to the real hotspots.
1570
+ """
1571
+ print('Hotspots are assigned.')
1572
+
1573
+ # a and b real hotspots, c and d found hotspots
1574
+ dist_ac = pynibs.calc_dist_pairwise(fn_geo, hotspot_idcs[0], found_idcs[0])
1575
+ dist_bc = pynibs.calc_dist_pairwise(fn_geo, hotspot_idcs[1], found_idcs[0])
1576
+ dist_ad = pynibs.calc_dist_pairwise(fn_geo, hotspot_idcs[0], found_idcs[1])
1577
+ dist_bd = pynibs.calc_dist_pairwise(fn_geo, hotspot_idcs[1], found_idcs[1])
1578
+
1579
+ if dist_ac + dist_bd <= dist_bc + dist_ad:
1580
+ # then a gets assigned c and b gets assigned d
1581
+ return np.round(dist_ac, 2), np.round(dist_bd, 2), found_idcs
1582
+ else:
1583
+ # then a gets assigned d and b gets assigned c, so switch places
1584
+ found_idcs_new = [found_idcs[1], found_idcs[0]]
1585
+ return np.round(dist_ad, 2), np.round(dist_bc, 2), found_idcs_new
1586
+
1587
+
1588
+ def get_quadrant_samples(e_field0, e_field1):
1589
+ """
1590
+ Returns one point in each of the four representative stimulation states based on the provided electric field values.
1591
+ The stimulation states for two elements are all four possible combinations of the states "stimulated" and "not
1592
+ stimulated".
1593
+
1594
+ Parameters
1595
+ ----------
1596
+ e_field0 : np.ndarray
1597
+ (n_zaps) The electric field values of element 0.
1598
+ e_field1 : np.ndarray
1599
+ (n_zaps) The electric field values of element 1.
1600
+
1601
+ Returns
1602
+ -------
1603
+ np.ndarray
1604
+ A 2D np.ndarray containing the x and y coordinates of the chosen point in the quadrant.
1605
+ """
1606
+ # Quadrants:
1607
+ # 2 | 4
1608
+ # ------
1609
+ # 1 | 3
1610
+
1611
+ # First get portion of distances horizontally and vertically to shift the four values slightly into the middle
1612
+ # (offset). No offset would mean the reference points are at the corners of the rectangle spanned by the data.
1613
+ h = (max(e_field0) - min(e_field0))/24
1614
+ v = (max(e_field1) - min(e_field1))/24
1615
+
1616
+ # 1 (bottom left)
1617
+ x = np.array(min(e_field0)+h)
1618
+ y = np.array(min(e_field1)+v)
1619
+ # 2 (top left)
1620
+ x = np.append(x, min(e_field0)+h)
1621
+ y = np.append(y, max(e_field1)-v)
1622
+ # 3 (bottom right)
1623
+ x = np.append(x, max(e_field0)-h)
1624
+ y = np.append(y, min(e_field1)+v)
1625
+ # 4 (top right)
1626
+ x = np.append(x, max(e_field0)-h)
1627
+ y = np.append(y, max(e_field1)-v)
1628
+
1629
+ return np.vstack((x, y)).transpose()
1630
+
1631
+
1632
+ def identify_shape(idx, scorematrix, e_subset, response):
1633
+ """
1634
+ Network type vectors are computed to determine most probable network type (vectors called shape_0 and shape_1).
1635
+ The predictions of decision tree classifiers trained on elements with high scores are used here to compute the most
1636
+ probable network type the hotspot element idx_h is involved in. At most 500 element pairs are considered.
1637
+
1638
+ Parameters
1639
+ ----------
1640
+ idx : int
1641
+ Index of the hotspot element in question.
1642
+ scorematrix : np.ndarray of float
1643
+ (n_elms, n_elms) Upper triangle score matrix containing one score for every combination of elements.
1644
+ e_subset : np.ndarray of float
1645
+ (n_zaps, n_elms) The efield magnitudes of the used coil positions across all ROI elements.
1646
+ response : np.ndarray
1647
+ (2, n_zaps) Response data: first entry original value, second entry binarized value.
1648
+
1649
+ Returns
1650
+ -------
1651
+ shape : np.ndarray of int
1652
+ Vector indicating shape of decision trees used in scoring the element in question (network type vector).
1653
+ """
1654
+ num_coil_samples = int(response[0].shape[0])
1655
+ min_samples_leaf = max(int(0.05*num_coil_samples), 1)
1656
+ if VERBOSE:
1657
+ print(f'min_samples_leaf for identify_shape: {min_samples_leaf}')
1658
+
1659
+ # shape parameter has 9 entries, ignore shape[0], shape[1:8] represent the 8 network types
1660
+ shape = np.zeros(9)
1661
+
1662
+ iterate_idcs = idx
1663
+
1664
+ # get indices of those elements that 'gave' element 'idx' its hotspot score (meaning: with these elements a score
1665
+ # higher than a calculated threshold was achieved)
1666
+ # because those are the elements the network identification is based on
1667
+ hotspot_idcs = pynibs.hotspots_by_score_percentiles(scorematrix, accumulated=False)[1]
1668
+ hotspot_pairs = hotspot_idcs.transpose()
1669
+ for i in np.arange(0, hotspot_pairs.shape[0], 1):
1670
+ if hotspot_pairs[i, 0] == idx:
1671
+ iterate_idcs = np.append(iterate_idcs, hotspot_pairs[i, 1])
1672
+ elif hotspot_pairs[i, 1] == idx:
1673
+ iterate_idcs = np.append(iterate_idcs, hotspot_pairs[i, 0])
1674
+
1675
+ # if number of pairs is >500: only take the highest 500 scores as an indicator
1676
+ if i > 1000:
1677
+ iterate_idcs = np.argpartition(scorematrix[idx], -500)[-500:]
1678
+ print(f'Because of a exceptionally many elements element {idx} has a high accuracy with, only the 500 ones with'
1679
+ ' highest accuracy were used for network type computation.')
1680
+ shape[0]=-500
1681
+
1682
+ elmts_to_iterate_over = iterate_idcs[iterate_idcs!=idx]
1683
+
1684
+ clf = tree.DecisionTreeClassifier(max_depth=2, min_samples_leaf=min_samples_leaf) # ccc
1685
+ # tried different parameter settings and adding max_leaf_nodes=3, didn't improve performance
1686
+
1687
+ # calc classifier for every one of those elements
1688
+ for i in elmts_to_iterate_over:
1689
+
1690
+ e_field0 = e_subset[:, idx]
1691
+ e_field1 = e_subset[:, i]
1692
+ if VERBOSE:
1693
+ print(f'index {idx} x {i}:')
1694
+
1695
+ stacked_efields = np.vstack((e_field0, e_field1)).transpose()
1696
+ clf.fit(stacked_efields, response[1])
1697
+
1698
+ # calculate prediction for every quadrant and translate into shape parameter
1699
+ # shape parameter has 9 entries, ignore shape[0], shape[1:8] represent the 8 network types
1700
+ q_samples = get_quadrant_samples(e_field0, e_field1)
1701
+ pred = clf.predict(q_samples)
1702
+ network_nr = 1 + pred[3] + 2*pred[2] + 4*pred[1]
1703
+ # shape[1] += pred[0] - worth a try
1704
+ # could use pred[0] for multiple things, like help distinguish shape 5 and 6
1705
+ shape[int(network_nr)] +=1
1706
+
1707
+ if VERBOSE:
1708
+ print(shape)
1709
+
1710
+ return shape
1711
+
1712
+
1713
+ def identify_network_type(found_idcs, scorematrix, e_subset, response):
1714
+ """
1715
+ Identifies the most probable network type for the found indices using the function identify_shape().
1716
+ The network type vectors are called shape_0 and shape_1.
1717
+
1718
+ Parameters
1719
+ ----------
1720
+ found_idcs : list
1721
+ A list containing the indices of the found hotspots.
1722
+ scorematrix : np.ndarray
1723
+ (n_elms, n_elms) Upper triangle score matrix containing the scores of each ROI element with all others.
1724
+ e_subset : np.ndarray of float
1725
+ (n_zaps, n_elms) The efield magnitudes of the used coil positions across all ROI elements.
1726
+ response : np.ndarray
1727
+ (2, n_zaps) Response data: first entry original value, second entry binarized value.
1728
+
1729
+ Returns
1730
+ -------
1731
+ found_network_type : int
1732
+ The identified network type.
1733
+ network_type_certainty : float
1734
+ The certainty level of the network type identification. (shape value of most probable network divided by
1735
+ value of the second most probable network)
1736
+ """
1737
+ # calculate network type vectors
1738
+ shape_0 = pynibs.identify_shape(found_idcs[0], scorematrix, e_subset, response)
1739
+ shape_1 = pynibs.identify_shape(found_idcs[1], scorematrix, e_subset, response)
1740
+
1741
+ # axes are wrong for hotspot 1 (since it is on the y axis in the quadrant logic that the shape parameter is based
1742
+ # on), switch the asymmetric types: type 4<->6 and type 3<->5
1743
+ shape_1_h = shape_1[3]
1744
+ shape_1[3] = shape_1[5]
1745
+ shape_1[5] = shape_1_h
1746
+ shape_1_h = shape_1[4]
1747
+ shape_1[4] = shape_1[6]
1748
+ shape_1[6] = shape_1_h
1749
+
1750
+ shape = np.add(shape_0, shape_1)
1751
+ shape_vector = shape
1752
+ print(f'Shape in sum: {shape}')
1753
+
1754
+ # calc found_network type
1755
+ found_network_type = np.argmax(shape)
1756
+
1757
+ # compute certainty level
1758
+ sec_shape = np.delete(shape, np.argmax(shape))
1759
+ network_type_certainty = 1 - (np.max(sec_shape) / np.max(shape))
1760
+
1761
+ return found_network_type, shape_vector, network_type_certainty
1762
+
1763
+
1764
+ def evaluate_network_identification(hotspot_dist, real_network_type, found_network_type):
1765
+ """
1766
+ Returns 1 if found_network_type is correct, 0 if not.
1767
+
1768
+ Parameters
1769
+ ----------
1770
+ hotspot_dist : float
1771
+ Distance between hotspots.
1772
+ real_network_type : int
1773
+ Real network type.
1774
+ found_network_type : int
1775
+ Found network type.
1776
+
1777
+ Returns
1778
+ -------
1779
+ eval : float
1780
+ Evaluation value indicating the accuracy of network identification.
1781
+ """
1782
+ # right type found: 100% accuracy
1783
+ eval = int(real_network_type == found_network_type)
1784
+ network_array = (real_network_type, found_network_type)
1785
+
1786
+ # cases SH_0 and SH_1 are tolerated to be mixed up if the second supposed hotspot is close to the first one
1787
+ if (network_array == (4, 6) or network_array == (6, 4)) and hotspot_dist < 20: # hhh hardcoded value here
1788
+ eval = 1
1789
+
1790
+ return eval
1791
+
1792
+
1793
+ def whole_network_detection(e_subset, response_data, scorematrix, hotspot_mask, base_path, config):
1794
+ """
1795
+ Based on the hotspot scores, 0-2 hotspot candidates are chosen. (Localization)
1796
+ If 0 are viable to be a hotspot, the result is a pseudonetwork. If only 1 viable hotspot was found, it is a single
1797
+ hotspot (type 4/6).
1798
+ If 2 hotspot candidates are found, the network type (1)-(8) is identified. (Identification)
1799
+
1800
+ Parameters
1801
+ ----------
1802
+ e_subset : np.ndarray of float
1803
+ (n_zaps, n_elms) The efield magnitudes of the used coil positions across all ROI elements.
1804
+ response_data : np.ndarray of float
1805
+ (2, n_zaps) Two arrays, data[0] contains the response corresponding to each coil position, data[1] contains its
1806
+ binarization (1: response affected, 0: response not affected).
1807
+ scorematrix : np.ndarray
1808
+ (n_elms, n_elms) Upper triangle score matrix containing the scores of each ROI element with all others.
1809
+ hotspot_mask : np.ndarray
1810
+ (n_elms) Hotspot score of every element, derived from the scorematrix.
1811
+ base_path : str
1812
+ The base path for the files.
1813
+ config : dict
1814
+ A dictionary containing the configuration parameters.
1815
+
1816
+ Returns
1817
+ -------
1818
+ runtime_detection : float
1819
+ Running time of network detection.
1820
+ detection_result : np.ndarray
1821
+ (7) contains the result of the detection, consisting of
1822
+ (found_network_type, found_idcs, found_acc, found_distance, found_scores, network_type_certainty, shape_vector).
1823
+ """
1824
+ print("Hotspots are localized and network identified. \n **** \n")
1825
+ start = time.time()
1826
+
1827
+ acc_thr = config['acc_thr']
1828
+ corr_thr = config['corr_thr']
1829
+
1830
+ network_type_certainty = np.nan
1831
+ shape_vector = np.full(9, np.nan)
1832
+ found_distance = [np.nan, np.nan]
1833
+ if config['scoring_method'] == 'regress_data' or config['scoring_method'] == 'mi': # single node approaches
1834
+ found_idcs, found_scores, found_acc, found_bool = \
1835
+ pynibs.find_distinct_single_hotspot(hotspot_mask, acc_thr)
1836
+ else: # dual node approach
1837
+ found_idcs, found_scores, found_acc, found_bool = \
1838
+ pynibs.find_distinct_hotspots(scorematrix, hotspot_mask, e_subset, acc_thr, corr_thr)
1839
+
1840
+ # 1 hotspot found
1841
+ if found_bool[0] and not found_bool[1]:
1842
+ print(f'A single-hotspot was detected.')
1843
+ found_network_type = 4
1844
+ print(f'Identified single hotspot: {found_idcs[0]}')
1845
+
1846
+ # 2 hotspots found (only possible for dual hotspot approaches)
1847
+ elif found_bool[0] and found_bool[1]:
1848
+ print('Two potential hotspots were detected.')
1849
+ (found_network_type, shape_vector, network_type_certainty) = \
1850
+ pynibs.identify_network_type(found_idcs, scorematrix, e_subset, response_data)
1851
+ print(f'Identified network type: ({found_network_type}) for hotspots {found_idcs}')
1852
+
1853
+ else:
1854
+ print('No hotspot was found.')
1855
+ found_network_type = 1
1856
+
1857
+ stop = time.time()
1858
+ runtime_detection = np.round(stop - start, 2)
1859
+
1860
+ # plot response data on plain spanned by both hotspot efields
1861
+ if found_bool[0] and found_bool[1]:
1862
+ if config['plot_std']:
1863
+ std_plt = pynibs.plot_data_std(response_data[0], e_subset[:, found_idcs[0]], e_subset[:, found_idcs[1]])
1864
+ fn_std_plt = os.path.join(base_path,
1865
+ f'plot_std_found_hotspots_{found_idcs[0]}_{found_idcs[1]}.png') # nnn
1866
+ std_plt.savefig(fn_std_plt, dpi=600)
1867
+ std_plt.close()
1868
+ if config['plot_bin']:
1869
+ bin_plt = pynibs.plot_data_bin(response_data[1], e_subset[:, found_idcs[0]], e_subset[:, found_idcs[1]])
1870
+ fn_bin_plt = os.path.join(base_path,
1871
+ f'plot_bin_found_hotspots_{found_idcs[0]}_{found_idcs[1]}.png') # nnn
1872
+ bin_plt.savefig(fn_bin_plt, dpi=600)
1873
+ bin_plt.close()
1874
+ if config['plot_curves']:
1875
+ if found_bool[0]:
1876
+ plot_idx0 = found_idcs[0]
1877
+ plt_curve = pynibs.plot_data_bin(np.zeros(response_data[1].shape), e_subset[:, plot_idx0], response_data[0])
1878
+ plt_curve.ylabel('response')
1879
+ plt_curve.savefig(f'{base_path}/plot_found_hotspot_{plot_idx0}_curve.png', dpi=600)
1880
+ plt_curve.close()
1881
+ if found_bool[1]:
1882
+ plot_idx1 = found_idcs[1]
1883
+ plt_curve1 = pynibs.plot_data_bin(np.zeros(response_data[1].shape), e_subset[:, plot_idx1], response_data[0])
1884
+ plt_curve1.ylabel('response')
1885
+ plt_curve1.xlabel('E-field $h_1')
1886
+ plt_curve1.savefig(f'{base_path}/plot_found_hotspot_{plot_idx1}_curve.png', dpi=600)
1887
+ plt_curve1.close()
1888
+
1889
+ detection_result = found_network_type, found_idcs, found_acc, found_distance, found_scores, \
1890
+ network_type_certainty, shape_vector
1891
+ return runtime_detection, detection_result
1892
+
1893
+
1894
+ def write_nda_test_results_csv(runtimes, e_subset, response_data, hotspot_mask, detection_result, fn_geo, config):
1895
+ """
1896
+ Evaluate network detection test results. Found networks are compared to the used settings for data generation.
1897
+ Then all used parameters and evaluation results are saved in a CSV file. See output_documentation.md for
1898
+ details about the output file.
1899
+
1900
+ Parameters
1901
+ ----------
1902
+ runtimes : np.ndarray of float
1903
+ (3) The runtimes in s for (generation, scoring, detection).
1904
+ detection_result : np.ndarray
1905
+ (7) contains the result of the detection, consisting of
1906
+ (found_network_type, found_idcs, found_acc, found_distance, found_scores, network_type_certainty, shape_vector).
1907
+ e_subset : np.ndarray of float
1908
+ (n_zaps, n_elms) The efield magnitudes of the used coil positions across all ROI elements .
1909
+ response_data : np.ndarray of float
1910
+ (2, n_zaps) Two arrays, data[0] contains the response corresponding to each coil position, data[1] contains its
1911
+ binarization (1: response affected, 0: response not affected).
1912
+ hotspot_mask : np.ndarray
1913
+ (n_elms) Hotspot score of every element, derived from the scorematrix.
1914
+ detection_result : np.ndarray
1915
+ (7) contains the result of the detection, consisting of
1916
+ (found_network_type, found_idcs, found_acc, found_distance, found_scores, network_type_certainty, shape_vector).
1917
+ fn_geo : str
1918
+ Path to the geo.hdf5-file.
1919
+ config : dict
1920
+ A dictionary containing the configuration parameters.
1921
+ """
1922
+ print("Results are evaluated automatically.\n **** \n")
1923
+
1924
+ # (1) read config params and results
1925
+ hotspot_idcs = (config['hotspot_elm0'], config['hotspot_elm1'])
1926
+ found_network_type, found_idcs, found_acc, found_distance, found_scores, \
1927
+ network_type_certainty, shape_vector = detection_result
1928
+
1929
+ # (2) hotspot assignment to enable network identification evaluation
1930
+
1931
+ # if only one hotspot found: assign the nearest real hotspot
1932
+ if found_network_type == 4 or found_network_type == 6:
1933
+ assignment = assign_found_hotspot_single(fn_geo, hotspot_idcs, found_idcs[0])
1934
+ if assignment[1] == 1: # switch hotspot order if necessary (in case of network type 6)
1935
+ found_network_type = 6
1936
+ found_idcs[1] = found_idcs[0]
1937
+ found_idcs[0] = np.nan
1938
+ found_distance[1] = assignment[0]
1939
+ found_scores[1] = found_scores[0]
1940
+ found_scores[0] = np.nan
1941
+ found_acc[1] = found_acc[0]
1942
+ found_acc[0] = np.nan
1943
+ else: # else keep order (in case of network type 4)
1944
+ found_network_type = 4
1945
+ found_distance[0] = assignment[0]
1946
+
1947
+ # 2 hotspots found: assign hotspots so that total distances are minimized for more precise evaluation
1948
+ elif found_network_type != 1:
1949
+ assignment = pynibs.assign_found_hotspots(fn_geo, hotspot_idcs, found_idcs)
1950
+ found_distance = [np.round(assignment[0], 2), np.round(assignment[1], 2)]
1951
+ # switch hotspot order if reassignment necessary
1952
+ if found_idcs[1] == assignment[2][0]:
1953
+ found_scores_h = found_scores[0]
1954
+ found_scores[0] = found_scores[1]
1955
+ found_scores[1] = found_scores_h
1956
+ found_acc_h = found_acc[0]
1957
+ found_acc[0] = found_acc[1]
1958
+ found_acc[1] = found_acc_h
1959
+ found_idcs = assignment[2]
1960
+ # adjust type if asymmetric type
1961
+ if found_network_type == 3:
1962
+ found_network_type = 5
1963
+ elif found_network_type == 5:
1964
+ found_network_type = 3
1965
+
1966
+ print(f'For Evaluation: Identified network type: ({found_network_type}) for hotspots {found_idcs}')
1967
+
1968
+ # (3) evaluation
1969
+ # collect additional info for result evaluation
1970
+ real_hotspot_dist = np.round(pynibs.calc_dist_pairwise(fn_geo, hotspot_idcs[0], hotspot_idcs[1]), 2)
1971
+ real_hotspot_corr = np.round(pynibs.compute_correlation_with_all_elements(e_subset, hotspot_idcs[0])[hotspot_idcs[1]], 2)
1972
+ hotspot_0_emax = np.round(np.max(e_subset[:, hotspot_idcs[0]]), 3)
1973
+ hotspot_1_emax = np.round(np.max(e_subset[:, hotspot_idcs[1]]), 3)
1974
+ num_hotspot_candidates = np.count_nonzero(hotspot_mask)
1975
+ found_accuracy = (np.round(found_acc[0], 3), np.round(found_acc[1], 3))
1976
+ if found_network_type in [4, 6, 1]:
1977
+ found_hotspots_dist, found_hotspots_corr = np.nan,np.nan
1978
+ else: # compute found hotspot distance and correlation for dual hotspot types
1979
+ found_hotspots_dist = np.round(pynibs.calc_dist_pairwise(fn_geo, found_idcs[0], found_idcs[1]), 2)
1980
+ found_hotspots_corr = np.round(
1981
+ pynibs.compute_correlation_with_all_elements(e_subset, found_idcs[0])[found_idcs[1]], 2)
1982
+
1983
+ # evaluate network type identification: 1 means the network was correctly identified, 0 otherwise
1984
+ network_types = ['NO', 'AND', '1_INH_0', 'SH_0', '0_INH_1', 'SH_1', 'XOR', 'OR']
1985
+ real_network_type = network_types.index(config['network_type'])+1
1986
+ identification_evaluation = pynibs.evaluate_network_identification(real_hotspot_dist, real_network_type, found_network_type)
1987
+
1988
+ # evaluate hotspot localization: 1 means active hotspots were localized within 10mm, 0 otherwise
1989
+ if real_network_type == 1:
1990
+ localization_evaluation=identification_evaluation
1991
+ elif real_network_type == 4:
1992
+ localization_evaluation = int(found_distance[0] < 10)
1993
+ elif real_network_type == 6:
1994
+ localization_evaluation = int(found_distance[1] < 10)
1995
+ else:
1996
+ localization_evaluation = int((found_distance[0] < 10) and (found_distance[1]<10))
1997
+
1998
+ # save information about the response
1999
+ response_max = np.round(np.max(response_data[0]), 3)
2000
+ response_mean = np.round(np.average(response_data[0]), 3)
2001
+ response_dev = np.round(np.std(response_data[0]), 3)
2002
+
2003
+ # save all parameters from config file
2004
+ values = config.values()
2005
+ excel_values = np.array(1) # first entry always 1, just cause
2006
+ for val in values:
2007
+ excel_values = np.append(excel_values, val)
2008
+ vals1 = excel_values[:7]
2009
+ vals2 = excel_values[7:-5] # the last 5 values are not saved (fn_results and booleans)
2010
+
2011
+ # save data in one row in {fn_results}, structure: configuration params, detection results, evaluation, additional info
2012
+ # configuration parameters
2013
+ output_csv = np.append(vals1, real_network_type)
2014
+ output_csv = np.append(output_csv, vals2)
2015
+ # detection results
2016
+ output_csv = np.append(output_csv,
2017
+ [found_idcs[0], found_idcs[1],
2018
+ np.round(found_scores[0], 2), np.round(found_scores[1], 2),
2019
+ found_accuracy[0], found_accuracy[1],
2020
+ found_hotspots_corr, found_hotspots_dist,
2021
+ found_network_type, np.round(network_type_certainty, 2)])
2022
+ output_csv=np.append(output_csv, shape_vector)
2023
+ # evaluation results (only in case of artificial data / testing reasons)
2024
+ output_csv = np.append(output_csv,
2025
+ [identification_evaluation, localization_evaluation,
2026
+ real_hotspot_dist, real_hotspot_corr,
2027
+ hotspot_0_emax, hotspot_1_emax,
2028
+ found_distance[0], found_distance[1],
2029
+ num_hotspot_candidates])
2030
+ # additional information and meta-data
2031
+ output_csv = np.append(output_csv,
2032
+ [response_max, response_mean, response_dev,
2033
+ runtimes[0], runtimes[1], runtimes[2]])
2034
+
2035
+ # write csv artificial data
2036
+ while True:
2037
+ fn_results = config['fn_results']
2038
+ try:
2039
+ # open the evaluation csv file in the write mode
2040
+ with open(f'/data/pt_01756/studies/network_mapping/testing_NDA/15484.08/{fn_results}.csv', 'a', # fff
2041
+ newline='', encoding='UTF8') as f:
2042
+ # create the csv writer
2043
+ writer = csv.writer(f)
2044
+ # append output row
2045
+ writer.writerow(output_csv)
2046
+ break
2047
+ except:
2048
+ print('problem accessing eval csv')
2049
+ time.sleep(1)
2050
+
2051
+ print(f'Saved results and evaluation in {fn_results}.csv \n **** \n ')
2052
+
2053
+
2054
+ def write_nda_application_results_csv(runtimes, e_subset, response_data, hotspot_mask, detection_result, fn_geo, config):
2055
+ """
2056
+ Writes network detection results to a CSV file based on the provided parameters and configuration for real data.
2057
+ See output_documentation.md for more.
2058
+
2059
+ Parameters
2060
+ ----------
2061
+ runtimes : np.ndarray of float
2062
+ (3), The runtimes in s for (generation, scoring, detection).
2063
+ e_subset : np.ndarray of float
2064
+ (n_zaps, n_elms) The efield magnitudes of the used coil positions across all ROI elements.
2065
+ response_data : np.ndarray of float
2066
+ (2, n_zaps) Two arrays, data[0] contains the response corresponding to each coil position, data[1] contains its
2067
+ binarization (1: response affected, 0: response not affected).
2068
+ hotspot_mask : np.ndarray
2069
+ (n_elms) Hotspot score of every element, derived from the scorematrix.
2070
+ detection_result : np.ndarray
2071
+ (7) contains the result of the detection, consisting of
2072
+ (found_network_type, found_idcs, found_acc, found_distance, found_scores, network_type_certainty, shape_vector).
2073
+ fn_geo : str
2074
+ Path to the geo.hdf5-file.
2075
+ config : dict
2076
+ A dictionary containing the configuration parameters.
2077
+ """
2078
+ found_network_type, found_idcs, found_acc, found_distance, found_scores, \
2079
+ network_type_certainty, shape_vector = detection_result
2080
+
2081
+ # collect additional info for result evaluation
2082
+ sample_size = e_subset.shape[0]
2083
+ found_accuracy = (np.round(found_acc[0], 3), np.round(found_acc[1], 3))
2084
+ # translate real network type id name into network name
2085
+ network_types = ['NO', 'AND', '1_INH_0', 'SH_0', '0_INH_1', 'SH_1', 'XOR', 'OR']
2086
+ found_network_name = network_types[found_network_type-1]
2087
+ if found_network_type in [4, 6, 1]:
2088
+ found_hotspots_dist, found_hotspots_corr = np.nan, np.nan
2089
+ else: # compute found hotspot distance and correlation for dual hotspot types
2090
+ found_hotspots_dist = np.round(pynibs.calc_dist_pairwise(fn_geo, found_idcs[0], found_idcs[1]), 2)
2091
+ found_hotspots_corr = np.round(
2092
+ pynibs.compute_correlation_with_all_elements(e_subset, found_idcs[0])[found_idcs[1]], 2)
2093
+
2094
+ # read relevant parameters from config file
2095
+ values = config.values()
2096
+ excel_values = np.array(1) # hardcoded first entry in the result file
2097
+ for val in values:
2098
+ excel_values = np.append(excel_values, val)
2099
+ vals = excel_values[:-8] # last entries not needed for evaluation
2100
+
2101
+ # collect information about the response
2102
+ response_max = np.round(np.max(response_data[0]), 3)
2103
+ response_mean = np.round(np.average(response_data[0]), 3)
2104
+ response_dev = np.round(np.std(response_data[0]), 3)
2105
+
2106
+ # save data in one row in {fn_results}, structure: config parameters, detection results, evaluation, additional info
2107
+ # configuration parameters
2108
+ output_csv = vals
2109
+ # detection results
2110
+ output_csv = np.append(output_csv,
2111
+ [found_idcs[0], found_idcs[1],
2112
+ np.round(found_scores[0], 2), np.round(found_scores[1], 2),
2113
+ found_accuracy[0], found_accuracy[1],
2114
+ found_hotspots_corr, found_hotspots_dist,
2115
+ found_network_type, np.round(network_type_certainty, 2)])
2116
+ output_csv = np.append(output_csv, shape_vector)
2117
+ # additional information and meta-data
2118
+ output_csv = np.append(output_csv,
2119
+ [sample_size, response_max, response_mean, response_dev,
2120
+ runtimes[0], runtimes[1], runtimes[2]])
2121
+
2122
+ # write csv real data
2123
+ while True:
2124
+ fn_results = config['fn_results']
2125
+ try:
2126
+ # open the evaluation csv file in write mode
2127
+ with open(f'/data/pt_01756/studies/network_mapping/evaluation_files_realdata/{fn_results}.csv', 'a', newline='',
2128
+ encoding='UTF8') as f:
2129
+ # create the csv writer
2130
+ writer = csv.writer(f)
2131
+ # append output row
2132
+ writer.writerow(output_csv)
2133
+ break
2134
+ except:
2135
+ print('problem accessing result csv')
2136
+ time.sleep(1)
2137
+ if VERBOSE:
2138
+ print(f'Found hotspots: {found_idcs}. \n'
2139
+ f'Saved results and evaluation in {fn_results} \n **** \n ')
2140
+
2141
+ return found_network_name, found_idcs
2142
+
2143
+
2144
+ def network_detection_algorithm_testing(e_matrix, roi_surf, fn_geo, base_path, config):
2145
+ """
2146
+ For testing reasons, experimental TMS data is generated and the network detection algorithm is applied to it.
2147
+ The e-field values are not generated but needed as input. The response values are computed based on the chosen
2148
+ network and the e-field values.
2149
+ A comparison between the algorithm output and the input settings is then saved in a .csv-file and can be used to
2150
+ assess the algorithm's performance.
2151
+
2152
+ Parameters
2153
+ ----------
2154
+ e_matrix : np.ndarray of float
2155
+ (n_zaps, n_elms) The efield magnitudes of all available coil positions across all ROI elements.
2156
+ roi_surf : ROI obj
2157
+ ROI surface object
2158
+ fn_geo : str
2159
+ Path to the geo.hdf5-file.
2160
+ base_path : str
2161
+ Path to the folder where results should end up.
2162
+ config : dict
2163
+ YAML configuration file content as a dictionary.
2164
+ """
2165
+ runtimes = np.zeros(shape=3)
2166
+ # apply network detection algorithm (steps 1-3) to artificially generated data and evaluate results:
2167
+ # (0) generate and (1) binarize response data
2168
+ e_subset, response = pynibs.write_network_detection_data_hdf5('artificial', e_matrix, None, base_path, config)
2169
+ # (2) scoring
2170
+ runtimes[1], scorematrix, hotspot_mask = pynibs.write_hotspot_scoremap_hdf5('artificial', e_subset, response, roi_surf, fn_geo, base_path, config)
2171
+ # (3) detection (localization and identification)
2172
+ runtimes[2], detection_result = pynibs.whole_network_detection(e_subset, response, scorematrix, hotspot_mask, base_path, config)
2173
+ # automatically evaluate results
2174
+ pynibs.write_nda_test_results_csv(runtimes, e_subset, response, hotspot_mask, detection_result, fn_geo, config)
2175
+
2176
+
2177
+ def network_detection_algorithm_application(e_matrix, response_values, roi_surf, fn_geo, base_path, config):
2178
+ """
2179
+ The network detection algorithm is applied to given data. Based on the e-field values and response values provided,
2180
+ a dual node network is identified (type (1)-(8), for further explanation see config file)
2181
+ and active network nodes (hotspots) are localized (specified by the element IDs).
2182
+ The results are saved in a .csv-file specified in the config file.
2183
+ An "effect map" of the ROI can be created, showing where to expect the highest stimulation effects.
2184
+
2185
+ Parameters
2186
+ ----------
2187
+ e_matrix : np.ndarray of float
2188
+ (n_zaps, n_elms) The efield magnitudes of all available coil positions across all ROI elements.
2189
+ response_values : np.ndarray
2190
+ (n_zaps) Response values (MEPs, reaction times, ...) corresponding to the coil configurations.
2191
+ roi_surf : ROI obj
2192
+ ROI surface object
2193
+ fn_geo : str
2194
+ Path to the geo.hdf5-file.
2195
+ base_path : str
2196
+ Path to the folder where results should end up.
2197
+ config : dict
2198
+ YAML configuration file content as a dictionary.
2199
+ """
2200
+ runtimes = np.zeros(shape=3) # initiate array for saving the running times of the 3 algorithm steps
2201
+
2202
+ # apply network detection algorithm
2203
+ # (1) binarization:
2204
+ e_subset, response = pynibs.write_network_detection_data_hdf5('real', e_matrix, response_values, base_path, config)
2205
+ # (2) score calculation
2206
+ runtimes[1], scorematrix, hotspot_mask = pynibs.write_hotspot_scoremap_hdf5('real', e_subset, response, roi_surf, fn_geo, base_path, config)
2207
+ # (3) detection
2208
+ runtimes[2], detection_result = pynibs.whole_network_detection(e_subset, response, scorematrix, hotspot_mask, base_path, config)
2209
+
2210
+ # save results in .csv (and write effect map if wanted)
2211
+ pynibs.write_nda_application_results_csv(runtimes, e_subset, response, hotspot_mask, detection_result, fn_geo, config)
2212
+ if config['write_effect_map']:
2213
+ pynibs.write_effect_map_hdf5('real', e_matrix, roi_surf, detection_result, base_path, config)
2214
+
2215
+ #
2216
+ # ARCHIVE
2217
+ #
2218
+ '''
2219
+ *******************************************
2220
+ corr_coeff_map:
2221
+ *******************************************
2222
+
2223
+ # just for testing reasons
2224
+ def write_correlation_coeff_1elm_hdf5(elmidx, roi_surf, config, base_path):
2225
+ print("Correlation coefficients are being calculated.")
2226
+ jitter = (config['jitter_ratio'], config['jitter_scale'])
2227
+ rn_seed = config['rn_seed']
2228
+ hotspot_idcs = (config['hotspot_elm0'], config['hotspot_elm1'])
2229
+ num_coil_samples = config['sample_size']
2230
+
2231
+ # load efields
2232
+ fn_data = f'data_jitter_{jitter}_hotspots_{hotspot_idcs}_seed_{rn_seed}.hdf5' # nnn
2233
+ with h5py.File(f'{base_path}/{fn_data}', "r") as f:
2234
+ e_subset = np.array(f['e_subset'])
2235
+
2236
+ # output naming
2237
+ fn_out_roi_scoremap = f"{base_path}/TEST_correlation_coeff_elm_{elmidx}_seed_{rn_seed}_data.hdf5"
2238
+ fn_out_roi_geo = f"{base_path}/TEST_correlation_coeff_elm_{elmidx}_seed_{rn_seed}_geo.hdf5"
2239
+
2240
+ corr_coeff = pynibs.compute_correlation_with_all_elements(e_subset, elmidx)
2241
+
2242
+ # save data as hdf5 _geo file (mapped)
2243
+ print(" > Creating .hdf5 geo files (mapped brain and roi) ...")
2244
+ pynibs.write_geo_hdf5_surf(out_fn=fn_out_roi_geo,
2245
+ points=roi_surf.node_coord_mid,
2246
+ con=roi_surf.node_number_list,
2247
+ replace=True,
2248
+ hdf5_path='/mesh')
2249
+ pynibs.write_data_hdf5_surf(data=corr_coeff,
2250
+ data_names=['correlation_coeff'],
2251
+ data_hdf_fn_out=fn_out_roi_scoremap,
2252
+ geo_hdf_fn=fn_out_roi_geo,
2253
+ replace=True)
2254
+
2255
+ print(f'Saved in folder: {base_path} \n **** \n ')
2256
+
2257
+
2258
+
2259
+
2260
+ *******************************************
2261
+ all shape map functions
2262
+ *******************************************
2263
+
2264
+
2265
+ #
2266
+ # Computes the classifier (DecisionTree) score of each combination of ROI elements.
2267
+ # (upper triangle matrix of the score matrix)
2268
+ #
2269
+ # @param efields ... the efields in the ROI of all (investigated) coil positions
2270
+ # shape: #coil pos x #roi elements
2271
+ # @param data ... the (binarized) MEPs elicited by each coil position
2272
+ # (indices in 0-axis must correspond to the 0-axis of 'efields')
2273
+ # @params weights ... weight the data points using these weights
2274
+ # (indices in 0-axis must correspond to the 0-axis of 'efields'
2275
+ # and 'data')
2276
+ # @return (np.ndarray) ... a upper triangle matrix containing the scores of each ROI
2277
+ # element with all others
2278
+ #
2279
+ def compute_scores_and_shape_with_all_elements(efields, data, weights=None, masked_idcs=None):
2280
+ dim = efields.shape[1]
2281
+ scores = np.zeros((dim,dim))
2282
+ shape = np.zeros((dim,dim))
2283
+
2284
+ for j in range(dim):
2285
+ element_idx, scores_row, shape_row = pynibs.compute_scores_and_shape_with_single_element(j,efields, data, weights, masked_idcs)
2286
+ scores[element_idx] = scores_row
2287
+ shape[element_idx] = shape_row
2288
+
2289
+ return scores, shape
2290
+
2291
+
2292
+ #
2293
+ # Computes the classifier (DecisionTree) score of each combination of ROI elements.
2294
+ # (multi-core enabled, upper triangle matrix of the score matrix)
2295
+ #
2296
+ # @param efields ... the efields in the ROI of all (investigated) coil positions
2297
+ # shape: #coil pos x #roi elements
2298
+ # @param data ... the (binarized) MEPs elicited by each coil position
2299
+ # (indices in 0-axis must correspond to the 0-axis of 'efields')
2300
+ # @params weights ... weight the data points using these weights
2301
+ # (indices in 0-axis must correspond to the 0-axis of 'efields'
2302
+ # and 'data')
2303
+ # @return (np.ndarray) ... an upper triangle matrix containing the scores of each ROI element with all others
2304
+ #
2305
+ def compute_scores_and_shape_with_all_elements_MP(efields, data, weights=None, masked_idcs=None):
2306
+ dim = efields.shape[1]
2307
+ scores = np.zeros((dim,dim))
2308
+ shape = np.zeros((dim,dim))
2309
+
2310
+ elmts_to_iterate_over = masked_idcs if masked_idcs is not None else range(dim)
2311
+
2312
+ num_processes=multiprocessing.cpu_count()
2313
+ with multiprocessing.Pool(processes=num_processes) as pool:
2314
+ # @TODO: monitor if chunksize=1 impairs performance; it was set to 1 to have a smoother tdqm-progress bar.
2315
+ mp_res = pool.starmap(
2316
+ pynibs.compute_scores_and_shape_with_single_element,
2317
+ tqdm(
2318
+ [(j,efields,data,weights,masked_idcs) for j in elmts_to_iterate_over],
2319
+ total=len(elmts_to_iterate_over)
2320
+ ),
2321
+ chunksize=1
2322
+ )
2323
+ pool.close()
2324
+ pool.join()
2325
+
2326
+ for res in mp_res:
2327
+ scores[res[0]] = res[1]
2328
+ shape[res[0]] = res[2]
2329
+
2330
+ return scores, shape
2331
+
2332
+
2333
+
2334
+ #
2335
+ # Computes the classifier (DecisionTree) score of element 'element_idx' with
2336
+ # all other elements within the ROI.
2337
+ #
2338
+ # @param element_idx ... the index of the element whose score with all other elements
2339
+ # should be computed
2340
+ # @param efields ... the efields in the ROI of all (investigated) coil positions
2341
+ # shape: #coil pos x #roi elements
2342
+ # @param data ... the (binarized) MEPs elicited by each coil position
2343
+ # (indices in 0-axis must correspond to the 0-axis of 'efields')
2344
+ # @params weights ... weight the data points using these weights
2345
+ # (indices in 0-axis must correspond to the 0-axis of 'efields'
2346
+ # and 'data')
2347
+ # @return (tuple) ... a tuple with the idx of the investigated element + an array containing
2348
+ # the scores of this element with each other array element
2349
+ # slice(None) equivalent to the colon operator ':'
2350
+ def compute_scores_and_shape_with_single_element(element_idx, efields, data, weights=None, masked_idcs=None):
2351
+ dim = efields.shape[1]
2352
+ scores = np.zeros(dim)
2353
+ shape = np.zeros(dim)
2354
+
2355
+ clf = tree.DecisionTreeClassifier(max_depth=2)
2356
+
2357
+ elmts_to_iterate_over = masked_idcs if masked_idcs is not None else range(dim)
2358
+
2359
+ for i in elmts_to_iterate_over:
2360
+ if i > element_idx:
2361
+ stacked_efields = np.vstack((efields[:, element_idx], efields[:, i])).transpose()
2362
+ clf.fit(stacked_efields, data, sample_weight=weights)
2363
+ scores[i] = clf.score(stacked_efields, data)
2364
+ if np.array_equal(clf.tree_.feature, [0, 0, -2, -2, 1, -2, -2]):
2365
+ shape[i] = 2
2366
+ else:
2367
+ shape[i] = 0
2368
+ #scores[i] = my_score(clf, stacked_efields, data)
2369
+
2370
+ return (element_idx, scores, shape)
2371
+ # return scores
2372
+
2373
+
2374
+
2375
+ '''