py-neuromodulation 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. py_neuromodulation/ConnectivityDecoding/_get_grid_hull.m +34 -34
  2. py_neuromodulation/ConnectivityDecoding/_get_grid_whole_brain.py +95 -106
  3. py_neuromodulation/ConnectivityDecoding/_helper_write_connectome.py +107 -119
  4. py_neuromodulation/__init__.py +80 -13
  5. py_neuromodulation/{nm_RMAP.py → analysis/RMAP.py} +496 -531
  6. py_neuromodulation/analysis/__init__.py +4 -0
  7. py_neuromodulation/{nm_decode.py → analysis/decode.py} +918 -992
  8. py_neuromodulation/{nm_analysis.py → analysis/feature_reader.py} +994 -1074
  9. py_neuromodulation/{nm_plots.py → analysis/plots.py} +627 -612
  10. py_neuromodulation/{nm_stats.py → analysis/stats.py} +458 -480
  11. py_neuromodulation/data/README +6 -6
  12. py_neuromodulation/data/dataset_description.json +8 -8
  13. py_neuromodulation/data/participants.json +32 -32
  14. py_neuromodulation/data/participants.tsv +2 -2
  15. py_neuromodulation/data/sub-testsub/ses-EphysMedOff/ieeg/sub-testsub_ses-EphysMedOff_space-mni_coordsystem.json +5 -5
  16. py_neuromodulation/data/sub-testsub/ses-EphysMedOff/ieeg/sub-testsub_ses-EphysMedOff_space-mni_electrodes.tsv +11 -11
  17. py_neuromodulation/data/sub-testsub/ses-EphysMedOff/ieeg/sub-testsub_ses-EphysMedOff_task-gripforce_run-0_channels.tsv +11 -11
  18. py_neuromodulation/data/sub-testsub/ses-EphysMedOff/ieeg/sub-testsub_ses-EphysMedOff_task-gripforce_run-0_ieeg.json +18 -18
  19. py_neuromodulation/data/sub-testsub/ses-EphysMedOff/ieeg/sub-testsub_ses-EphysMedOff_task-gripforce_run-0_ieeg.vhdr +35 -35
  20. py_neuromodulation/data/sub-testsub/ses-EphysMedOff/ieeg/sub-testsub_ses-EphysMedOff_task-gripforce_run-0_ieeg.vmrk +13 -13
  21. py_neuromodulation/data/sub-testsub/ses-EphysMedOff/sub-testsub_ses-EphysMedOff_scans.tsv +2 -2
  22. py_neuromodulation/default_settings.yaml +241 -0
  23. py_neuromodulation/features/__init__.py +31 -0
  24. py_neuromodulation/features/bandpower.py +165 -0
  25. py_neuromodulation/features/bispectra.py +157 -0
  26. py_neuromodulation/features/bursts.py +297 -0
  27. py_neuromodulation/features/coherence.py +255 -0
  28. py_neuromodulation/features/feature_processor.py +121 -0
  29. py_neuromodulation/features/fooof.py +142 -0
  30. py_neuromodulation/features/hjorth_raw.py +57 -0
  31. py_neuromodulation/features/linelength.py +21 -0
  32. py_neuromodulation/features/mne_connectivity.py +148 -0
  33. py_neuromodulation/features/nolds.py +94 -0
  34. py_neuromodulation/features/oscillatory.py +249 -0
  35. py_neuromodulation/features/sharpwaves.py +432 -0
  36. py_neuromodulation/filter/__init__.py +3 -0
  37. py_neuromodulation/filter/kalman_filter.py +67 -0
  38. py_neuromodulation/filter/kalman_filter_external.py +1890 -0
  39. py_neuromodulation/filter/mne_filter.py +128 -0
  40. py_neuromodulation/filter/notch_filter.py +93 -0
  41. py_neuromodulation/grid_cortex.tsv +40 -40
  42. py_neuromodulation/liblsl/libpugixml.so.1.12 +0 -0
  43. py_neuromodulation/liblsl/linux/bionic_amd64/liblsl.1.16.2.so +0 -0
  44. py_neuromodulation/liblsl/linux/bookworm_amd64/liblsl.1.16.2.so +0 -0
  45. py_neuromodulation/liblsl/linux/focal_amd46/liblsl.1.16.2.so +0 -0
  46. py_neuromodulation/liblsl/linux/jammy_amd64/liblsl.1.16.2.so +0 -0
  47. py_neuromodulation/liblsl/linux/jammy_x86/liblsl.1.16.2.so +0 -0
  48. py_neuromodulation/liblsl/linux/noble_amd64/liblsl.1.16.2.so +0 -0
  49. py_neuromodulation/liblsl/macos/amd64/liblsl.1.16.2.dylib +0 -0
  50. py_neuromodulation/liblsl/macos/arm64/liblsl.1.16.0.dylib +0 -0
  51. py_neuromodulation/liblsl/windows/amd64/liblsl.1.16.2.dll +0 -0
  52. py_neuromodulation/liblsl/windows/x86/liblsl.1.16.2.dll +0 -0
  53. py_neuromodulation/processing/__init__.py +10 -0
  54. py_neuromodulation/{nm_artifacts.py → processing/artifacts.py} +29 -25
  55. py_neuromodulation/processing/data_preprocessor.py +77 -0
  56. py_neuromodulation/processing/filter_preprocessing.py +78 -0
  57. py_neuromodulation/processing/normalization.py +175 -0
  58. py_neuromodulation/{nm_projection.py → processing/projection.py} +370 -394
  59. py_neuromodulation/{nm_rereference.py → processing/rereference.py} +97 -95
  60. py_neuromodulation/{nm_resample.py → processing/resample.py} +56 -50
  61. py_neuromodulation/stream/__init__.py +3 -0
  62. py_neuromodulation/stream/data_processor.py +325 -0
  63. py_neuromodulation/stream/generator.py +53 -0
  64. py_neuromodulation/stream/mnelsl_player.py +94 -0
  65. py_neuromodulation/stream/mnelsl_stream.py +120 -0
  66. py_neuromodulation/stream/settings.py +292 -0
  67. py_neuromodulation/stream/stream.py +427 -0
  68. py_neuromodulation/utils/__init__.py +2 -0
  69. py_neuromodulation/{nm_define_nmchannels.py → utils/channels.py} +305 -302
  70. py_neuromodulation/utils/database.py +149 -0
  71. py_neuromodulation/utils/io.py +378 -0
  72. py_neuromodulation/utils/keyboard.py +52 -0
  73. py_neuromodulation/utils/logging.py +66 -0
  74. py_neuromodulation/utils/types.py +251 -0
  75. {py_neuromodulation-0.0.4.dist-info → py_neuromodulation-0.0.6.dist-info}/METADATA +28 -33
  76. py_neuromodulation-0.0.6.dist-info/RECORD +89 -0
  77. {py_neuromodulation-0.0.4.dist-info → py_neuromodulation-0.0.6.dist-info}/WHEEL +1 -1
  78. {py_neuromodulation-0.0.4.dist-info → py_neuromodulation-0.0.6.dist-info}/licenses/LICENSE +21 -21
  79. py_neuromodulation/FieldTrip.py +0 -589
  80. py_neuromodulation/_write_example_dataset_helper.py +0 -65
  81. py_neuromodulation/nm_EpochStream.py +0 -92
  82. py_neuromodulation/nm_IO.py +0 -417
  83. py_neuromodulation/nm_across_patient_decoding.py +0 -927
  84. py_neuromodulation/nm_bispectra.py +0 -168
  85. py_neuromodulation/nm_bursts.py +0 -198
  86. py_neuromodulation/nm_coherence.py +0 -205
  87. py_neuromodulation/nm_cohortwrapper.py +0 -435
  88. py_neuromodulation/nm_eval_timing.py +0 -239
  89. py_neuromodulation/nm_features.py +0 -116
  90. py_neuromodulation/nm_features_abc.py +0 -39
  91. py_neuromodulation/nm_filter.py +0 -219
  92. py_neuromodulation/nm_filter_preprocessing.py +0 -91
  93. py_neuromodulation/nm_fooof.py +0 -159
  94. py_neuromodulation/nm_generator.py +0 -37
  95. py_neuromodulation/nm_hjorth_raw.py +0 -73
  96. py_neuromodulation/nm_kalmanfilter.py +0 -58
  97. py_neuromodulation/nm_linelength.py +0 -33
  98. py_neuromodulation/nm_mne_connectivity.py +0 -112
  99. py_neuromodulation/nm_nolds.py +0 -93
  100. py_neuromodulation/nm_normalization.py +0 -214
  101. py_neuromodulation/nm_oscillatory.py +0 -448
  102. py_neuromodulation/nm_run_analysis.py +0 -435
  103. py_neuromodulation/nm_settings.json +0 -338
  104. py_neuromodulation/nm_settings.py +0 -68
  105. py_neuromodulation/nm_sharpwaves.py +0 -401
  106. py_neuromodulation/nm_stream_abc.py +0 -218
  107. py_neuromodulation/nm_stream_offline.py +0 -359
  108. py_neuromodulation/utils/_logging.py +0 -24
  109. py_neuromodulation-0.0.4.dist-info/RECORD +0 -72
@@ -1,992 +1,918 @@
1
- from sklearn import (
2
- model_selection,
3
- metrics,
4
- linear_model,
5
- discriminant_analysis,
6
- base,
7
- decomposition,
8
- cross_decomposition,
9
- )
10
- from skopt.space import Real, Integer, Categorical
11
- from skopt.utils import use_named_args
12
- from skopt import gp_minimize, Optimizer
13
- from sklearn.linear_model import ElasticNet
14
- from sklearn.base import clone
15
- from sklearn.utils import class_weight
16
- from scipy.ndimage import binary_dilation, binary_erosion
17
- from scipy.ndimage import label as label_ndimage
18
- from imblearn.over_sampling import RandomOverSampler
19
- from imblearn.under_sampling import RandomUnderSampler
20
- import pandas as pd
21
- import os
22
- import json
23
- import numpy as np
24
- #from numba import jit
25
- #import xgboost
26
- from copy import deepcopy
27
-
28
- from mrmr import mrmr_classif
29
- from typing import Type
30
- import _pickle as cPickle
31
-
32
-
33
- class CV_res:
34
- def __init__(
35
- self,
36
- get_movement_detection_rate: bool = False,
37
- RUN_BAY_OPT: bool = False,
38
- mrmr_select: bool = False,
39
- model_save: bool = False,
40
- ) -> None:
41
-
42
- self.score_train = []
43
- self.score_test = []
44
- self.y_test = []
45
- self.y_train = []
46
- self.y_test_pr = []
47
- self.y_train_pr = []
48
- self.X_test = []
49
- self.X_train = []
50
- self.coef = []
51
- if get_movement_detection_rate is True:
52
- self.mov_detection_rates_test = []
53
- self.tprate_test = []
54
- self.fprate_test = []
55
- self.mov_detection_rates_train = []
56
- self.tprate_train = []
57
- self.fprate_train = []
58
- if RUN_BAY_OPT is True:
59
- self.best_bay_opt_params = []
60
- if mrmr_select is True:
61
- self.mrmr_select = []
62
- if model_save is True:
63
- self.model_save = []
64
-
65
-
66
- class Decoder:
67
-
68
- features: pd.DataFrame
69
- label: np.ndarray
70
- model: base.BaseEstimator
71
- cv_method: model_selection.BaseCrossValidator
72
- use_nested_cv: bool
73
- threshold_score: bool
74
- mov_detection_threshold: float
75
- TRAIN_VAL_SPLIT: bool
76
- RUN_BAY_OPT: bool
77
- save_coef: bool
78
- get_movement_detection_rate: bool
79
- min_consequent_count: int
80
- STACK_FEATURES_N_SAMPLES: bool
81
- time_stack_n_samples: int
82
- ros: RandomOverSampler = None
83
- rus: RandomUnderSampler = None
84
- VERBOSE: bool = False
85
- ch_ind_data: dict = {}
86
- grid_point_ind_data: dict = {}
87
- active_gridpoints: list = []
88
- feature_names: list = []
89
- ch_ind_results: dict = {}
90
- gridpoint_ind_results: dict = {}
91
- all_ch_results: dict = {}
92
-
93
- class ClassMissingException(Exception):
94
- def __init__(
95
- self,
96
- message="Only one class present.",
97
- ) -> None:
98
- self.message = message
99
- super().__init__(self.message)
100
-
101
- def __str__(self):
102
- print(self.message)
103
-
104
- def __init__(
105
- self,
106
- features: pd.DataFrame = None,
107
- label: np.ndarray = None,
108
- label_name: str = None,
109
- used_chs: list[str] = None,
110
- model=linear_model.LinearRegression(),
111
- eval_method=metrics.r2_score,
112
- cv_method=model_selection.KFold(n_splits=3, shuffle=False),
113
- use_nested_cv: bool = False,
114
- threshold_score=True,
115
- mov_detection_threshold: float = 0.5,
116
- TRAIN_VAL_SPLIT: bool = False,
117
- RUN_BAY_OPT: bool = False,
118
- STACK_FEATURES_N_SAMPLES: bool = False,
119
- time_stack_n_samples: int = 5,
120
- save_coef: bool = False,
121
- get_movement_detection_rate: bool = False,
122
- min_consequent_count: int = 3,
123
- bay_opt_param_space: list = [],
124
- VERBOSE: bool = False,
125
- sfreq: int = None,
126
- undersampling: bool = False,
127
- oversampling: bool = False,
128
- mrmr_select: bool = False,
129
- pca: bool = False,
130
- cca: bool = False,
131
- model_save: bool = False,
132
- ) -> None:
133
- """Initialize here a feature file for processing
134
- Read settings.json nm_channels.csv and features.csv
135
- Read target label
136
-
137
- Parameters
138
- ----------
139
- model : machine learning model
140
- model that utilizes fit and predict functions
141
- eval_method : sklearn metrics
142
- evaluation scoring method
143
- cv_method : sklearm model_selection method
144
- threshold_score : boolean
145
- if True set lower threshold at zero (useful for r2),
146
- mov_detection_threshold : float
147
- if get_movement_detection_rate is True, find given minimum 'threshold' respective
148
- consecutive movement blocks, by default 0.5
149
- TRAIN_VAL_SPLIT (boolean):
150
- if true split data into additinal validation, and run class weighted CV
151
- save_coef (boolean):
152
- if true, save model._coef trained coefficients
153
- get_movement_detection_rate (boolean):
154
- save detection rate and tpr / fpr as well
155
- min_consequent_count (int):
156
- if get_movement_detection_rate is True, find given 'min_consequent_count' respective
157
- consecutive movement blocks with minimum size of 'min_consequent_count'
158
- """
159
-
160
- self.model = model
161
- self.eval_method = eval_method
162
- self.cv_method = cv_method
163
- self.use_nested_cv = use_nested_cv
164
- self.threshold_score = threshold_score
165
- self.mov_detection_threshold = mov_detection_threshold
166
- self.TRAIN_VAL_SPLIT = TRAIN_VAL_SPLIT
167
- self.RUN_BAY_OPT = RUN_BAY_OPT
168
- self.save_coef = save_coef
169
- self.sfreq = sfreq
170
- self.get_movement_detection_rate = get_movement_detection_rate
171
- self.min_consequent_count = min_consequent_count
172
- self.STACK_FEATURES_N_SAMPLES = STACK_FEATURES_N_SAMPLES
173
- self.time_stack_n_samples = time_stack_n_samples
174
- self.bay_opt_param_space = bay_opt_param_space
175
- self.VERBOSE = VERBOSE
176
- self.undersampling = undersampling
177
- self.oversampling = oversampling
178
- self.mrmr_select = mrmr_select
179
- self.used_chs = used_chs
180
- self.label = label
181
- self.label_name = label_name
182
- self.cca = cca
183
- self.pca = pca
184
- self.model_save = model_save
185
-
186
- self.set_data(features)
187
-
188
- self.ch_ind_data = {}
189
- self.grid_point_ind_data = {}
190
- self.active_gridpoints = []
191
- self.feature_names = []
192
- self.ch_ind_results = {}
193
- self.gridpoint_ind_results = {}
194
- self.all_ch_results = {}
195
- self.columns_names_single_ch = None
196
-
197
- if undersampling:
198
- self.rus = RandomUnderSampler(random_state=0)
199
-
200
- if oversampling:
201
- self.ros = RandomOverSampler(random_state=0)
202
-
203
- def set_data(self, features):
204
-
205
- if features is not None:
206
- self.features = features
207
- self.feature_names = [
208
- col
209
- for col in self.features.columns
210
- if not (("time" in col) or (self.label_name in col))
211
- ]
212
- self.data = np.nan_to_num(
213
- np.array(self.features[self.feature_names])
214
- )
215
-
216
- def set_data_ind_channels(self):
217
- """specified channel individual data"""
218
- self.ch_ind_data = {}
219
- for ch in self.used_chs:
220
- self.ch_ind_data[ch] = np.nan_to_num(
221
- np.array(
222
- self.features[
223
- [
224
- col
225
- for col in self.features.columns
226
- if col.startswith(ch)
227
- ]
228
- ]
229
- )
230
- )
231
-
232
- def set_CV_results(self, attr_name, contact_point=None):
233
- """set CV results in respectie nm_decode attributes
234
- The reference is first stored in obj_set, and the used lateron
235
-
236
- Parameters
237
- ----------
238
- attr_name : string
239
- is either all_ch_results, ch_ind_results, gridpoint_ind_results
240
- contact_point : object, optional
241
- usually an int specifying the grid_point or string, specifying the used channel,
242
- by default None
243
- """
244
- if contact_point is not None:
245
- getattr(self, attr_name)[contact_point] = {}
246
- obj_set = getattr(self, attr_name)[contact_point]
247
- else:
248
- obj_set = getattr(self, attr_name)
249
-
250
- def set_scores(cv_res: Type[CV_res], set_inner_CV_res: bool = False):
251
- """
252
- This function renames the CV_res keys for InnerCV
253
- """
254
-
255
- def set_score(key_: str, val):
256
- if set_inner_CV_res is True:
257
- key_ = "InnerCV_" + key_
258
- obj_set[key_] = val
259
-
260
- set_score("score_train", cv_res.score_train)
261
- set_score("score_test", cv_res.score_test)
262
- set_score("y_test", cv_res.y_test)
263
- set_score("y_train", cv_res.y_train)
264
- set_score("y_test_pr", cv_res.y_test_pr)
265
- set_score("y_train_pr", cv_res.y_train_pr)
266
- set_score("X_train", cv_res.X_train)
267
- set_score("X_test", cv_res.X_test)
268
-
269
- if self.save_coef:
270
- set_score("coef", cv_res.coef)
271
- if self.get_movement_detection_rate:
272
- set_score(
273
- "mov_detection_rates_test", cv_res.mov_detection_rates_test
274
- )
275
- set_score(
276
- "mov_detection_rates_train",
277
- cv_res.mov_detection_rates_train,
278
- )
279
- set_score("fprate_test", cv_res.fprate_test)
280
- set_score("fprate_train", cv_res.fprate_train)
281
- set_score("tprate_test", cv_res.tprate_test)
282
- set_score("tprate_train", cv_res.tprate_train)
283
-
284
- if self.RUN_BAY_OPT is True:
285
- set_score("best_bay_opt_params", cv_res.best_bay_opt_params)
286
-
287
- if self.mrmr_select is True:
288
- set_score("mrmr_select", cv_res.mrmr_select)
289
- if self.model_save is True:
290
- set_score("model_save", cv_res.model_save)
291
- return obj_set
292
-
293
- obj_set = set_scores(self.cv_res)
294
-
295
- if self.use_nested_cv is True:
296
- obj_set = set_scores(self.cv_res_inner, set_inner_CV_res=True)
297
-
298
- def run_CV_caller(self, feature_contacts: str = "ind_channels"):
299
- """Wrapper that call for all channels / grid points / combined channels the CV function
300
-
301
- Parameters
302
- ----------
303
- feature_contacts : str, optional
304
- "grid_points", "ind_channels" or "all_channels_combined" , by default "ind_channels"
305
- """
306
- valid_feature_contacts = [
307
- "ind_channels",
308
- "all_channels_combined",
309
- "grid_points",
310
- ]
311
- if feature_contacts not in valid_feature_contacts:
312
- raise ValueError(
313
- f"{feature_contacts} not in {valid_feature_contacts}"
314
- )
315
-
316
- if feature_contacts == "grid_points":
317
- for grid_point in self.active_gridpoints:
318
- self.run_CV(self.grid_point_ind_data[grid_point], self.label)
319
- self.set_CV_results(
320
- "gridpoint_ind_results", contact_point=grid_point
321
- )
322
- return self.gridpoint_ind_results
323
-
324
- if feature_contacts == "ind_channels":
325
- for ch in self.used_chs:
326
- self.ch_name_tested = ch
327
- self.run_CV(self.ch_ind_data[ch], self.label)
328
- self.set_CV_results("ch_ind_results", contact_point=ch)
329
- return self.ch_ind_results
330
-
331
- if feature_contacts == "all_channels_combined":
332
- dat_combined = np.array(self.data)
333
- self.run_CV(dat_combined, self.label)
334
- self.set_CV_results("all_ch_results", contact_point=None)
335
- return self.all_ch_results
336
-
337
- def set_data_grid_points(self, cortex_only=False, subcortex_only=False):
338
- """Read the run_analysis
339
- Projected data has the shape (samples, grid points, features)
340
- """
341
-
342
- # activate_gridpoints stores cortex + subcortex data
343
- self.active_gridpoints = np.unique(
344
- [
345
- i.split("_")[0] + "_" + i.split("_")[1]
346
- for i in self.features.columns
347
- if "grid" in i
348
- ]
349
- )
350
-
351
- if cortex_only:
352
- self.active_gridpoints = [
353
- i for i in self.active_gridpoints if i.startswith("gridcortex")
354
- ]
355
-
356
- if subcortex_only:
357
- self.active_gridpoints = [
358
- i
359
- for i in self.active_gridpoints
360
- if i.startswith("gridsubcortex")
361
- ]
362
-
363
- self.feature_names = [
364
- i[len(self.active_gridpoints[0] + "_") :]
365
- for i in self.features.columns
366
- if self.active_gridpoints[0] + "_" in i
367
- ]
368
-
369
- self.grid_point_ind_data = {}
370
-
371
- self.grid_point_ind_data = {
372
- grid_point: np.nan_to_num(
373
- self.features[
374
- [i for i in self.features.columns if grid_point + "_" in i]
375
- ]
376
- )
377
- for grid_point in self.active_gridpoints
378
- }
379
-
380
- def get_movement_grouped_array(
381
- self, prediction, threshold=0.5, min_consequent_count=5
382
- ):
383
- """Return given a 1D numpy array, an array of same size with grouped consective blocks
384
-
385
- Parameters
386
- ----------
387
- prediction : np.array
388
- numpy array of either predictions or labels, that is going to be grouped
389
- threshold : float, optional
390
- threshold to be applied to 'prediction', by default 0.5
391
- min_consequent_count : int, optional
392
- minimum required consective samples higher than 'threshold', by default 5
393
-
394
- Returns
395
- -------
396
- labeled_array : np.array
397
- grouped vector with incrementing number for movement blocks
398
- labels_count : int
399
- count of individual movement blocks
400
- """
401
- mask = prediction > threshold
402
- structure = [
403
- True
404
- ] * min_consequent_count # used for erosion and dilation
405
- eroded = binary_erosion(mask, structure)
406
- dilated = binary_dilation(eroded, structure)
407
- labeled_array, labels_count = label_ndimage(dilated)
408
- return labeled_array, labels_count
409
-
410
- def calc_movement_detection_rate(
411
- self, y_label, prediction, threshold=0.5, min_consequent_count=3
412
- ):
413
- """Given a label and prediction, return the movement detection rate on the basis of
414
- movements classified in blocks of 'min_consequent_count'.
415
-
416
- Parameters
417
- ----------
418
- y_label : [type]
419
- [description]
420
- prediction : [type]
421
- [description]
422
- threshold : float, optional
423
- threshold to be applied to 'prediction', by default 0.5
424
- min_consequent_count : int, optional
425
- minimum required consective samples higher than 'threshold', by default 3
426
-
427
- Returns
428
- -------
429
- mov_detection_rate : float
430
- movement detection rate, where at least 'min_consequent_count' samples where high in prediction
431
- fpr : np.array
432
- sklearn.metrics false positive rate np.array
433
- tpr : np.array
434
- sklearn.metrics true positive rate np.array
435
- """
436
-
437
- pred_grouped, _ = self.get_movement_grouped_array(
438
- prediction, threshold, min_consequent_count
439
- )
440
- y_grouped, labels_count = self.get_movement_grouped_array(
441
- y_label, threshold, min_consequent_count
442
- )
443
-
444
- hit_rate = np.zeros(labels_count)
445
- pred_group_bin = np.array(pred_grouped > 0)
446
-
447
- for label_number in range(
448
- 1, labels_count + 1
449
- ): # labeling starts from 1
450
- hit_rate[label_number - 1] = np.sum(
451
- pred_group_bin[np.where(y_grouped == label_number)[0]]
452
- )
453
-
454
- try:
455
- mov_detection_rate = (
456
- np.where(hit_rate > 0)[0].shape[0] / labels_count
457
- )
458
- except ZeroDivisionError:
459
- print("no movements in label")
460
- return 0, 0, 0
461
-
462
- # calculating TPR and FPR: https://stackoverflow.com/a/40324184/5060208
463
- CM = metrics.confusion_matrix(y_label, prediction)
464
-
465
- TN = CM[0][0]
466
- FN = CM[1][0]
467
- TP = CM[1][1]
468
- FP = CM[0][1]
469
- fpr = FP / (FP + TN)
470
- tpr = TP / (TP + FN)
471
-
472
- return mov_detection_rate, fpr, tpr
473
-
474
- def init_cv_res(self) -> None:
475
- return CV_res(
476
- get_movement_detection_rate=self.get_movement_detection_rate,
477
- RUN_BAY_OPT=self.RUN_BAY_OPT,
478
- mrmr_select=self.mrmr_select,
479
- model_save=self.model_save,
480
- )
481
-
482
- # @staticmethod
483
- # @jit(nopython=True)
484
- def append_previous_n_samples(X: np.ndarray, y: np.ndarray, n: int = 5):
485
- """
486
- stack feature vector for n samples
487
- """
488
- TIME_DIM = X.shape[0] - n
489
- FEATURE_DIM = int(n * X.shape[1])
490
- time_arr = np.empty((TIME_DIM, FEATURE_DIM))
491
- for time_idx, time_ in enumerate(np.arange(n, X.shape[0])):
492
- for time_point in range(n):
493
- time_arr[
494
- time_idx,
495
- time_point * X.shape[1] : (time_point + 1) * X.shape[1],
496
- ] = X[time_ - time_point, :]
497
- return time_arr, y[n:]
498
-
499
- @staticmethod
500
- def append_samples_val(X_train, y_train, X_val, y_val, n):
501
-
502
- X_train, y_train = Decoder.append_previous_n_samples(
503
- X_train, y_train, n=n
504
- )
505
- X_val, y_val = Decoder.append_previous_n_samples(X_val, y_val, n=n)
506
- return X_train, y_train, X_val, y_val
507
-
508
- def fit_model(self, model, X_train, y_train):
509
-
510
- if self.TRAIN_VAL_SPLIT is True:
511
- X_train, X_val, y_train, y_val = model_selection.train_test_split(
512
- X_train, y_train, train_size=0.7, shuffle=False
513
- )
514
-
515
- if y_train.sum() == 0 or y_val.sum(0) == 0:
516
- raise Decoder.ClassMissingException
517
-
518
- # if type(model) is xgboost.sklearn.XGBClassifier:
519
- # classes_weights = class_weight.compute_sample_weight(
520
- # class_weight="balanced", y=y_train
521
- # )
522
- # model.set_params(eval_metric="logloss")
523
- # model.fit(
524
- # X_train,
525
- # y_train,
526
- # eval_set=[(X_val, y_val)],
527
- # early_stopping_rounds=7,
528
- # sample_weight=classes_weights,
529
- # verbose=self.VERBOSE,
530
- # )
531
- # elif type(model) is xgboost.sklearn.XGBRegressor:
532
- # # might be necessary to adapt for other classifiers
533
- #
534
- # def evalerror(preds, dtrain):
535
- # labels = dtrain.get_label()
536
- # # return a pair metric_name, result. The metric name must not contain a
537
- # # colon (:) or a space since preds are margin(before logistic
538
- # # transformation, cutoff at 0)
539
- #
540
- # r2 = metrics.r2_score(labels, preds)
541
- #
542
- # if r2 < 0:
543
- # r2 = 0
544
- #
545
- # return "r2", -r2
546
- #
547
- # model.set_params(eval_metric=evalerror)
548
- # model.fit(
549
- # X_train,
550
- # y_train,
551
- # eval_set=[(X_val, y_val)],
552
- # early_stopping_rounds=10,
553
- # verbose=self.VERBOSE,
554
- # )
555
- # else:
556
- # model.fit(X_train, y_train, eval_set=[(X_val, y_val)])
557
- else:
558
-
559
- # check for LDA; and apply rebalancing
560
- if self.oversampling:
561
- X_train, y_train = self.ros.fit_resample(X_train, y_train)
562
- if self.undersampling:
563
- X_train, y_train = self.rus.fit_resample(X_train, y_train)
564
-
565
- #if type(model) is xgboost.sklearn.XGBClassifier:
566
- # model.set_params(eval_metric="logloss")
567
- # model.fit(X_train, y_train)
568
- #else:
569
- model.fit(X_train, y_train)
570
-
571
- return model
572
-
573
- def eval_model(
574
- self,
575
- model_train,
576
- X_train,
577
- X_test,
578
- y_train,
579
- y_test,
580
- cv_res: Type[CV_res],
581
- save_data=True,
582
- save_probabilities=False,
583
- ) -> Type[CV_res]:
584
-
585
- if self.save_coef:
586
- cv_res.coef.append(model_train.coef_)
587
-
588
- y_test_pr = model_train.predict(X_test)
589
- y_train_pr = model_train.predict(X_train)
590
-
591
- sc_te = self.eval_method(y_test, y_test_pr)
592
- sc_tr = self.eval_method(y_train, y_train_pr)
593
-
594
- if self.threshold_score is True:
595
- if sc_tr < 0:
596
- sc_tr = 0
597
- if sc_te < 0:
598
- sc_te = 0
599
-
600
- if self.get_movement_detection_rate is True:
601
- self._set_movement_detection_rates(
602
- y_test, y_test_pr, y_train, y_train_pr, cv_res
603
- )
604
-
605
- cv_res.score_train.append(sc_tr)
606
- cv_res.score_test.append(sc_te)
607
- if save_data is True:
608
- cv_res.X_train.append(X_train)
609
- cv_res.X_test.append(X_test)
610
- if self.model_save is True:
611
- cv_res.model_save.append(
612
- deepcopy(model_train)
613
- ) # clone won't copy params
614
- cv_res.y_train.append(y_train)
615
- cv_res.y_test.append(y_test)
616
-
617
- if save_probabilities is False:
618
- cv_res.y_train_pr.append(y_train_pr)
619
- cv_res.y_test_pr.append(y_test_pr)
620
- else:
621
- cv_res.y_train_pr.append(model_train.predict_proba(X_train))
622
- cv_res.y_test_pr.append(model_train.predict_proba(X_test))
623
- return cv_res
624
-
625
- def _set_movement_detection_rates(
626
- self,
627
- y_test: np.ndarray,
628
- y_test_pr: np.ndarray,
629
- y_train: np.ndarray,
630
- y_train_pr: np.ndarray,
631
- cv_res: Type[CV_res],
632
- ) -> Type[CV_res]:
633
-
634
- mov_detection_rate, fpr, tpr = self.calc_movement_detection_rate(
635
- y_test,
636
- y_test_pr,
637
- self.mov_detection_threshold,
638
- self.min_consequent_count,
639
- )
640
-
641
- cv_res.mov_detection_rates_test.append(mov_detection_rate)
642
- cv_res.tprate_test.append(tpr)
643
- cv_res.fprate_test.append(fpr)
644
-
645
- mov_detection_rate, fpr, tpr = self.calc_movement_detection_rate(
646
- y_train,
647
- y_train_pr,
648
- self.mov_detection_threshold,
649
- self.min_consequent_count,
650
- )
651
-
652
- cv_res.mov_detection_rates_train.append(mov_detection_rate)
653
- cv_res.tprate_train.append(tpr)
654
- cv_res.fprate_train.append(fpr)
655
-
656
- return cv_res
657
-
658
- def wrapper_model_train(
659
- self,
660
- X_train,
661
- y_train,
662
- X_test=None,
663
- y_test=None,
664
- cv_res: Type[CV_res] = None,
665
- return_fitted_model_only: bool = False,
666
- save_data=True,
667
- ):
668
-
669
- if cv_res is None:
670
- cv_res = CV_res(
671
- get_movement_detection_rate=self.get_movement_detection_rate,
672
- RUN_BAY_OPT=self.RUN_BAY_OPT,
673
- mrmr_select=self.mrmr_select,
674
- model_save=self.model_save,
675
- )
676
-
677
- model_train = clone(self.model)
678
- if self.STACK_FEATURES_N_SAMPLES is True:
679
- if X_test is not None:
680
- X_train, y_train, X_test, y_test = Decoder.append_samples_val(
681
- X_train,
682
- y_train,
683
- X_test,
684
- y_test,
685
- n=self.time_stack_n_samples,
686
- )
687
- else:
688
- X_train, y_train = Decoder.append_previous_n_samples(
689
- X_train, y_train, n=self.time_stack_n_samples
690
- )
691
-
692
- if y_train.sum() == 0 or (
693
- y_test is not None and y_test.sum() == 0
694
- ): # only one class present
695
- raise Decoder.ClassMissingException
696
-
697
- if self.RUN_BAY_OPT is True:
698
- model_train = self.bay_opt_wrapper(model_train, X_train, y_train)
699
-
700
- if self.mrmr_select is True:
701
- if len(self.feature_names) > X_train.shape[1]:
702
- # analyze induvidual ch
703
- columns_names = [
704
- col
705
- for col in self.feature_names
706
- if col.startswith(self.ch_name_tested)
707
- ]
708
- if self.columns_names_single_ch is None:
709
- self.columns_names_single_ch = [
710
- f[len(self.ch_name_tested) + 1 :]
711
- for f in columns_names
712
- ]
713
- else:
714
- # analyze all_ch_combined
715
- columns_names = self.feature_names
716
- X_train = pd.DataFrame(X_train, columns=columns_names)
717
- X_test = pd.DataFrame(X_test, columns=columns_names)
718
-
719
- y_train = pd.Series(y_train)
720
- selected_features = mrmr_classif(
721
- X=X_train, y=y_train, K=20, n_jobs=60
722
- )
723
-
724
- X_train = X_train[selected_features]
725
- X_test = X_test[selected_features]
726
-
727
- if self.pca is True:
728
- pca = decomposition.PCA(n_components=10)
729
- pca.fit(X_train)
730
- X_train = pca.transform(X_train)
731
- X_test = pca.transform(X_test)
732
-
733
- if self.cca is True:
734
- cca = cross_decomposition.CCA(n_components=10)
735
- cca.fit(X_train, y_train)
736
- X_train = cca.transform(X_train)
737
- X_test = cca.transform(X_test)
738
-
739
- if self.STACK_FEATURES_N_SAMPLES is True:
740
-
741
- if return_fitted_model_only is True:
742
- X_train, y_train = self.append_previous_n_samples(
743
- X_train, y_train, self.time_stack_n_samples
744
- )
745
- else:
746
- X_train, y_train, X_test, y_test = self.append_samples_val(
747
- X_train, y_train, X_test, y_test, self.time_stack_n_samples
748
- )
749
-
750
- # fit model
751
- model_train = self.fit_model(model_train, X_train, y_train)
752
-
753
- if return_fitted_model_only is True:
754
- return model_train
755
-
756
- cv_res = self.eval_model(
757
- model_train, X_train, X_test, y_train, y_test, cv_res, save_data
758
- )
759
-
760
- if self.mrmr_select is True:
761
- cv_res.mrmr_select.append(selected_features)
762
-
763
- return cv_res
764
-
765
- def run_CV(self, data, label):
766
- """Evaluate model performance on the specified cross validation.
767
- If no data and label is specified, use whole feature class attributes.
768
-
769
- Parameters
770
- ----------
771
- data (np.ndarray):
772
- data to train and test with shape samples, features
773
- label (np.ndarray):
774
- label to train and test with shape samples, features
775
- """
776
-
777
- def split_data(data):
778
- if self.cv_method == "NonShuffledTrainTestSplit":
779
-
780
- # set outer 10s set to train index
781
- # test index is thus in the middle starting at random number
782
- N_samples = data.shape[0]
783
- test_area_points = (N_samples - self.sfreq * 10) - (
784
- self.sfreq * 10
785
- )
786
- test_points = int(N_samples * 0.3)
787
-
788
- if test_area_points > test_points:
789
- start_index = np.random.randint(
790
- int(self.sfreq * 10),
791
- N_samples - self.sfreq * 10 - test_points,
792
- )
793
- test_index = np.arange(
794
- start_index, start_index + test_points
795
- )
796
- train_index = np.concatenate(
797
- (
798
- np.arange(0, start_index),
799
- np.arange(start_index + test_points, N_samples),
800
- ),
801
- axis=0,
802
- ).flatten()
803
- yield train_index, test_index
804
- else:
805
- cv_single_tr_te_split = model_selection.check_cv(
806
- cv=[
807
- model_selection.train_test_split(
808
- np.arange(data.shape[0]),
809
- test_size=0.3,
810
- shuffle=False,
811
- )
812
- ]
813
- )
814
- for (
815
- train_index,
816
- test_index,
817
- ) in cv_single_tr_te_split.split():
818
- yield train_index, test_index
819
- else:
820
- for train_index, test_index in self.cv_method.split(data):
821
- yield train_index, test_index
822
-
823
- cv_res = self.init_cv_res()
824
-
825
- if self.use_nested_cv is True:
826
- cv_res_inner = self.init_cv_res()
827
-
828
- for train_index, test_index in split_data(data):
829
- X_train, y_train = data[train_index, :], label[train_index]
830
- X_test, y_test = data[test_index], label[test_index]
831
- try:
832
- cv_res = self.wrapper_model_train(
833
- X_train, y_train, X_test, y_test, cv_res
834
- )
835
- except Decoder.ClassMissingException:
836
- continue
837
-
838
- if self.use_nested_cv is True:
839
- data_inner = data[train_index]
840
- label_inner = label[train_index]
841
- for train_index_inner, test_index_inner in split_data(
842
- data_inner
843
- ):
844
- X_train_inner = data_inner[train_index_inner, :]
845
- y_train_inner = label_inner[train_index_inner]
846
- X_test_inner = data_inner[test_index_inner]
847
- y_test_inner = label_inner[test_index_inner]
848
- try:
849
- cv_res_inner = self.wrapper_model_train(
850
- X_train_inner,
851
- y_train_inner,
852
- X_test_inner,
853
- y_test_inner,
854
- cv_res_inner,
855
- )
856
- except Decoder.ClassMissingException:
857
- continue
858
-
859
- self.cv_res = cv_res
860
- if self.use_nested_cv is True:
861
- self.cv_res_inner = cv_res_inner
862
-
863
- def bay_opt_wrapper(self, model_train, X_train, y_train):
864
- """Run bayesian optimization and test best params to model_train
865
- Save best params into self.best_bay_opt_params
866
- """
867
-
868
- (
869
- X_train_bo,
870
- X_test_bo,
871
- y_train_bo,
872
- y_test_bo,
873
- ) = model_selection.train_test_split(
874
- X_train, y_train, train_size=0.7, shuffle=False
875
- )
876
-
877
- if y_train_bo.sum() == 0 or y_test_bo.sum() == 0:
878
- print("could not start Bay. Opt. with no labels > 0")
879
- raise Decoder.ClassMissingException
880
-
881
- params_bo = self.run_Bay_Opt(
882
- X_train_bo, y_train_bo, X_test_bo, y_test_bo, rounds=10
883
- )
884
-
885
- # set bay. opt. obtained best params to model
886
- params_bo_dict = {}
887
- for i in range(len(params_bo)):
888
- setattr(
889
- model_train, self.bay_opt_param_space[i].name, params_bo[i]
890
- )
891
- params_bo_dict[self.bay_opt_param_space[i].name] = params_bo[i]
892
-
893
- self.best_bay_opt_params.append(params_bo_dict)
894
-
895
- return model_train
896
-
897
- def run_Bay_Opt(
898
- self,
899
- X_train,
900
- y_train,
901
- X_test,
902
- y_test,
903
- rounds=30,
904
- base_estimator="GP",
905
- acq_func="EI",
906
- acq_optimizer="sampling",
907
- initial_point_generator="lhs",
908
- ):
909
- """Run skopt bayesian optimization
910
- skopt.Optimizer:
911
- https://scikit-optimize.github.io/stable/modules/generated/skopt.Optimizer.html#skopt.Optimizer
912
-
913
- example:
914
- https://scikit-optimize.github.io/stable/auto_examples/ask-and-tell.html#sphx-glr-auto-examples-ask-and-tell-py
915
-
916
- Special attention needs to be made with the run_CV output,
917
- some metrics are minimized (MAE), some are maximized (r^2)
918
-
919
- Parameters
920
- ----------
921
- X_train: np.ndarray
922
- y_train: np.ndarray
923
- X_test: np.ndarray
924
- y_test: np.ndarray
925
- rounds : int, optional
926
- optimizing rounds, by default 10
927
- base_estimator : str, optional
928
- surrogate model, used as optimization function instead of cross validation, by default "GP"
929
- acq_func : str, optional
930
- function to minimize over the posterior distribution, by default "EI"
931
- acq_optimizer : str, optional
932
- method to minimize the acquisition function, by default "sampling"
933
- initial_point_generator : str, optional
934
- sets a initial point generator, by default "lhs"
935
-
936
- Returns
937
- -------
938
- skopt result parameters
939
- """
940
-
941
- def get_f_val(model_bo):
942
-
943
- try:
944
- model_bo = self.fit_model(model_bo, X_train, y_train)
945
- except Decoder.ClassMissingException:
946
- pass
947
-
948
- return self.eval_method(y_test, model_bo.predict(X_test))
949
-
950
- opt = Optimizer(
951
- self.bay_opt_param_space,
952
- base_estimator=base_estimator,
953
- acq_func=acq_func,
954
- acq_optimizer=acq_optimizer,
955
- initial_point_generator=initial_point_generator,
956
- )
957
-
958
- for _ in range(rounds):
959
- next_x = opt.ask()
960
- # set model values
961
- model_bo = clone(self.model)
962
- for i in range(len(next_x)):
963
- setattr(model_bo, self.bay_opt_param_space[i].name, next_x[i])
964
- f_val = get_f_val(model_bo)
965
- res = opt.tell(next_x, f_val)
966
- if self.VERBOSE:
967
- print(f_val)
968
-
969
- # res is here automatically appended by skopt
970
- return res.x
971
-
972
- def save(
973
- self, feature_path: str, feature_file: str, str_save_add=None
974
- ) -> None:
975
- """Save decoder object to pickle"""
976
-
977
- # why is the decoder not saved to a .json?
978
-
979
- if str_save_add is None:
980
- PATH_OUT = os.path.join(
981
- feature_path, feature_file, feature_file + "_ML_RES.p"
982
- )
983
- else:
984
- PATH_OUT = os.path.join(
985
- feature_path,
986
- feature_file,
987
- feature_file + "_" + str_save_add + "_ML_RES.p",
988
- )
989
-
990
- print("model being saved to: " + str(PATH_OUT))
991
- with open(PATH_OUT, "wb") as output: # Overwrites any existing file.
992
- cPickle.dump(self, output)
1
+ from sklearn import model_selection
2
+ from sklearn.linear_model import LinearRegression
3
+ from sklearn.base import clone
4
+ from sklearn.metrics import r2_score
5
+
6
+ import pandas as pd
7
+ import numpy as np
8
+ from copy import deepcopy
9
+ from pathlib import PurePath
10
+ import pickle
11
+
12
+ from py_neuromodulation import logger
13
+
14
+ from typing import Callable
15
+
16
+
17
+ class CV_res:
18
+ def __init__(
19
+ self,
20
+ get_movement_detection_rate: bool = False,
21
+ RUN_BAY_OPT: bool = False,
22
+ mrmr_select: bool = False,
23
+ model_save: bool = False,
24
+ ) -> None:
25
+ self.score_train: list = []
26
+ self.score_test: list = []
27
+ self.y_test: list = []
28
+ self.y_train: list = []
29
+ self.y_test_pr: list = []
30
+ self.y_train_pr: list = []
31
+ self.X_test: list = []
32
+ self.X_train: list = []
33
+ self.coef: list = []
34
+
35
+ if get_movement_detection_rate:
36
+ self.mov_detection_rates_test: list = []
37
+ self.tprate_test: list = []
38
+ self.fprate_test: list = []
39
+ self.mov_detection_rates_train: list = []
40
+ self.tprate_train: list = []
41
+ self.fprate_train: list = []
42
+ if RUN_BAY_OPT:
43
+ self.best_bay_opt_params: list = []
44
+ if mrmr_select:
45
+ self.mrmr_select: list = []
46
+ if model_save:
47
+ self.model_save: list = []
48
+
49
+
50
+ class Decoder:
51
+ class ClassMissingException(Exception):
52
+ def __init__(
53
+ self,
54
+ message="Only one class present.",
55
+ ) -> None:
56
+ self.message = message
57
+ super().__init__(self.message)
58
+
59
+ def __str__(self):
60
+ return self.message
61
+
62
+ def __init__(
63
+ self,
64
+ features: "pd.DataFrame| None " = None,
65
+ label: np.ndarray | None = None,
66
+ label_name: str | None = None,
67
+ used_chs: list[str] = [],
68
+ model=LinearRegression(),
69
+ eval_method: Callable = r2_score,
70
+ cv_method=model_selection.KFold(n_splits=3, shuffle=False),
71
+ use_nested_cv: bool = False,
72
+ threshold_score=True,
73
+ mov_detection_threshold: float = 0.5,
74
+ TRAIN_VAL_SPLIT: bool = False,
75
+ RUN_BAY_OPT: bool = False,
76
+ STACK_FEATURES_N_SAMPLES: bool = False,
77
+ time_stack_n_samples: int = 5,
78
+ save_coef: bool = False,
79
+ get_movement_detection_rate: bool = False,
80
+ min_consequent_count: int = 3,
81
+ bay_opt_param_space: list = [],
82
+ VERBOSE: bool = False,
83
+ sfreq: int | None = None,
84
+ undersampling: bool = False,
85
+ oversampling: bool = False,
86
+ mrmr_select: bool = False,
87
+ pca: bool = False,
88
+ cca: bool = False,
89
+ model_save: bool = False,
90
+ ) -> None:
91
+ """Initialize here a feature file for processing
92
+ Read settings.json channels.csv and features.csv
93
+ Read target label
94
+
95
+ Parameters
96
+ ----------
97
+ model : machine learning model
98
+ model that utilizes fit and predict functions
99
+ eval_method : sklearn metrics
100
+ evaluation scoring method, will default to r2_score if not passed
101
+ cv_method : sklearm model_selection method
102
+ threshold_score : boolean
103
+ if True set lower threshold at zero (useful for r2),
104
+ mov_detection_threshold : float
105
+ if get_movement_detection_rate is True, find given minimum 'threshold' respective
106
+ consecutive movement blocks, by default 0.5
107
+ TRAIN_VAL_SPLIT (boolean):
108
+ if true split data into additinal validation, and run class weighted CV
109
+ save_coef (boolean):
110
+ if true, save model._coef trained coefficients
111
+ get_movement_detection_rate (boolean):
112
+ save detection rate and tpr / fpr as well
113
+ min_consequent_count (int):
114
+ if get_movement_detection_rate is True, find given 'min_consequent_count' respective
115
+ consecutive movement blocks with minimum size of 'min_consequent_count'
116
+ """
117
+
118
+ self.model = model
119
+ self.eval_method = eval_method
120
+ self.cv_method = cv_method
121
+ self.use_nested_cv = use_nested_cv
122
+ self.threshold_score = threshold_score
123
+ self.mov_detection_threshold = mov_detection_threshold
124
+ self.TRAIN_VAL_SPLIT = TRAIN_VAL_SPLIT
125
+ self.RUN_BAY_OPT = RUN_BAY_OPT
126
+ self.save_coef = save_coef
127
+ self.sfreq = sfreq
128
+ self.get_movement_detection_rate = get_movement_detection_rate
129
+ self.min_consequent_count = min_consequent_count
130
+ self.STACK_FEATURES_N_SAMPLES = STACK_FEATURES_N_SAMPLES
131
+ self.time_stack_n_samples = time_stack_n_samples
132
+ self.bay_opt_param_space = bay_opt_param_space
133
+ self.VERBOSE = VERBOSE
134
+ self.undersampling = undersampling
135
+ self.oversampling = oversampling
136
+ self.mrmr_select = mrmr_select
137
+ self.used_chs = used_chs
138
+ self.label = label
139
+ self.label_name = label_name
140
+ self.cca = cca
141
+ self.pca = pca
142
+ self.model_save = model_save
143
+
144
+ self.set_data(features)
145
+
146
+ self.ch_ind_data = {}
147
+ self.grid_point_ind_data = {}
148
+ self.active_gridpoints = []
149
+ self.feature_names = []
150
+ self.ch_ind_results = {}
151
+ self.gridpoint_ind_results = {}
152
+ self.all_ch_results = {}
153
+ self.columns_names_single_ch = None
154
+
155
+ if undersampling:
156
+ from imblearn.under_sampling import RandomUnderSampler
157
+
158
+ self.rus = RandomUnderSampler(random_state=0)
159
+
160
+ if oversampling:
161
+ from imblearn.over_sampling import RandomOverSampler
162
+
163
+ self.ros = RandomOverSampler(random_state=0)
164
+
165
+ def set_data(self, features):
166
+ if features is not None:
167
+ self.features = features
168
+ self.feature_names = [
169
+ col
170
+ for col in self.features.columns
171
+ if not (("time" in col) or (self.label_name in col))
172
+ ]
173
+ self.data = np.nan_to_num(np.array(self.features[self.feature_names]))
174
+
175
+ def set_data_ind_channels(self):
176
+ """specified channel individual data"""
177
+ self.ch_ind_data = {}
178
+ for ch in self.used_chs:
179
+ self.ch_ind_data[ch] = np.nan_to_num(
180
+ np.array(
181
+ self.features[
182
+ [col for col in self.features.columns if col.startswith(ch)]
183
+ ]
184
+ )
185
+ )
186
+
187
+ def set_CV_results(self, attr_name, contact_point=None):
188
+ """set CV results in respectie nm_decode attributes
189
+ The reference is first stored in obj_set, and the used lateron
190
+
191
+ Parameters
192
+ ----------
193
+ attr_name : string
194
+ is either all_ch_results, ch_ind_results, gridpoint_ind_results
195
+ contact_point : object, optional
196
+ usually an int specifying the grid_point or string, specifying the used channel,
197
+ by default None
198
+ """
199
+ if contact_point is not None:
200
+ getattr(self, attr_name)[contact_point] = {}
201
+ obj_set = getattr(self, attr_name)[contact_point]
202
+ else:
203
+ obj_set = getattr(self, attr_name)
204
+
205
+ def set_scores(cv_res: CV_res, set_inner_CV_res: bool = False):
206
+ """
207
+ This function renames the CV_res keys for InnerCV
208
+ """
209
+
210
+ def set_score(key_: str, val):
211
+ if set_inner_CV_res:
212
+ key_ = "InnerCV_" + key_
213
+ obj_set[key_] = val
214
+
215
+ set_score("score_train", cv_res.score_train)
216
+ set_score("score_test", cv_res.score_test)
217
+ set_score("y_test", cv_res.y_test)
218
+ set_score("y_train", cv_res.y_train)
219
+ set_score("y_test_pr", cv_res.y_test_pr)
220
+ set_score("y_train_pr", cv_res.y_train_pr)
221
+ set_score("X_train", cv_res.X_train)
222
+ set_score("X_test", cv_res.X_test)
223
+
224
+ if self.save_coef:
225
+ set_score("coef", cv_res.coef)
226
+ if self.get_movement_detection_rate:
227
+ set_score("mov_detection_rates_test", cv_res.mov_detection_rates_test)
228
+ set_score(
229
+ "mov_detection_rates_train",
230
+ cv_res.mov_detection_rates_train,
231
+ )
232
+ set_score("fprate_test", cv_res.fprate_test)
233
+ set_score("fprate_train", cv_res.fprate_train)
234
+ set_score("tprate_test", cv_res.tprate_test)
235
+ set_score("tprate_train", cv_res.tprate_train)
236
+
237
+ if self.RUN_BAY_OPT:
238
+ set_score("best_bay_opt_params", cv_res.best_bay_opt_params)
239
+
240
+ if self.mrmr_select:
241
+ set_score("mrmr_select", cv_res.mrmr_select)
242
+ if self.model_save:
243
+ set_score("model_save", cv_res.model_save)
244
+ return obj_set
245
+
246
+ obj_set = set_scores(self.cv_res)
247
+
248
+ if self.use_nested_cv:
249
+ obj_set = set_scores(self.cv_res_inner, set_inner_CV_res=True)
250
+
251
+ def run_CV_caller(self, feature_contacts: str = "ind_channels"):
252
+ """Wrapper that call for all channels / grid points / combined channels the CV function
253
+
254
+ Parameters
255
+ ----------
256
+ feature_contacts : str, optional
257
+ "grid_points", "ind_channels" or "all_channels_combined" , by default "ind_channels"
258
+ """
259
+ valid_feature_contacts = [
260
+ "ind_channels",
261
+ "all_channels_combined",
262
+ "grid_points",
263
+ ]
264
+ if feature_contacts not in valid_feature_contacts:
265
+ raise ValueError(f"{feature_contacts} not in {valid_feature_contacts}")
266
+
267
+ if feature_contacts == "grid_points":
268
+ for grid_point in self.active_gridpoints:
269
+ self.run_CV(self.grid_point_ind_data[grid_point], self.label)
270
+ self.set_CV_results("gridpoint_ind_results", contact_point=grid_point)
271
+ return self.gridpoint_ind_results
272
+
273
+ if feature_contacts == "ind_channels":
274
+ for ch in self.used_chs:
275
+ self.ch_name_tested = ch
276
+ self.run_CV(self.ch_ind_data[ch], self.label)
277
+ self.set_CV_results("ch_ind_results", contact_point=ch)
278
+ return self.ch_ind_results
279
+
280
+ if feature_contacts == "all_channels_combined":
281
+ dat_combined = np.array(self.data)
282
+ self.run_CV(dat_combined, self.label)
283
+ self.set_CV_results("all_ch_results", contact_point=None)
284
+ return self.all_ch_results
285
+
286
+ def set_data_grid_points(self, cortex_only=False, subcortex_only=False):
287
+ """Read the run_analysis
288
+ Projected data has the shape (samples, grid points, features)
289
+ """
290
+
291
+ # activate_gridpoints stores cortex + subcortex data
292
+ self.active_gridpoints = np.unique(
293
+ [
294
+ i.split("_")[0] + "_" + i.split("_")[1]
295
+ for i in self.features.columns
296
+ if "grid" in i
297
+ ]
298
+ )
299
+
300
+ if cortex_only:
301
+ self.active_gridpoints = [
302
+ i for i in self.active_gridpoints if i.startswith("gridcortex")
303
+ ]
304
+
305
+ if subcortex_only:
306
+ self.active_gridpoints = [
307
+ i for i in self.active_gridpoints if i.startswith("gridsubcortex")
308
+ ]
309
+
310
+ self.feature_names = [
311
+ i[len(self.active_gridpoints[0] + "_") :]
312
+ for i in self.features.columns
313
+ if self.active_gridpoints[0] + "_" in i
314
+ ]
315
+
316
+ self.grid_point_ind_data = {}
317
+
318
+ self.grid_point_ind_data = {
319
+ grid_point: np.nan_to_num(
320
+ self.features[
321
+ [i for i in self.features.columns if grid_point + "_" in i]
322
+ ]
323
+ )
324
+ for grid_point in self.active_gridpoints
325
+ }
326
+
327
+ def get_movement_grouped_array(
328
+ self, prediction, threshold=0.5, min_consequent_count=5
329
+ ):
330
+ """Return given a 1D numpy array, an array of same size with grouped consective blocks
331
+
332
+ Parameters
333
+ ----------
334
+ prediction : np.ndarray
335
+ numpy array of either predictions or labels, that is going to be grouped
336
+ threshold : float, optional
337
+ threshold to be applied to 'prediction', by default 0.5
338
+ min_consequent_count : int, optional
339
+ minimum required consective samples higher than 'threshold', by default 5
340
+
341
+ Returns
342
+ -------
343
+ labeled_array : np.ndarray
344
+ grouped vector with incrementing number for movement blocks
345
+ labels_count : int
346
+ count of individual movement blocks
347
+ """
348
+
349
+ from scipy.ndimage import label as label_ndimage
350
+ from scipy.ndimage import binary_dilation, binary_erosion
351
+
352
+ mask = prediction > threshold
353
+ structure = [True] * min_consequent_count # used for erosion and dilation
354
+ eroded = binary_erosion(mask, structure)
355
+ dilated = binary_dilation(eroded, structure)
356
+ labeled_array, labels_count = label_ndimage(dilated)
357
+ return labeled_array, labels_count
358
+
359
+ def calc_movement_detection_rate(
360
+ self, y_label, prediction, threshold=0.5, min_consequent_count=3
361
+ ):
362
+ """Given a label and prediction, return the movement detection rate on the basis of
363
+ movements classified in blocks of 'min_consequent_count'.
364
+
365
+ Parameters
366
+ ----------
367
+ y_label : [type]
368
+ [description]
369
+ prediction : [type]
370
+ [description]
371
+ threshold : float, optional
372
+ threshold to be applied to 'prediction', by default 0.5
373
+ min_consequent_count : int, optional
374
+ minimum required consective samples higher than 'threshold', by default 3
375
+
376
+ Returns
377
+ -------
378
+ mov_detection_rate : float
379
+ movement detection rate, where at least 'min_consequent_count' samples where high in prediction
380
+ fpr : np.ndarray
381
+ sklearn.metrics false positive rate np.ndarray
382
+ tpr : np.ndarray
383
+ sklearn.metrics true positive rate np.ndarray
384
+ """
385
+ from sklearn.metrics import confusion_matrix
386
+
387
+ pred_grouped, _ = self.get_movement_grouped_array(
388
+ prediction, threshold, min_consequent_count
389
+ )
390
+ y_grouped, labels_count = self.get_movement_grouped_array(
391
+ y_label, threshold, min_consequent_count
392
+ )
393
+
394
+ hit_rate = np.zeros(labels_count)
395
+ pred_group_bin = np.array(pred_grouped > 0)
396
+
397
+ for label_number in range(1, labels_count + 1): # labeling starts from 1
398
+ hit_rate[label_number - 1] = np.sum(
399
+ pred_group_bin[np.where(y_grouped == label_number)[0]]
400
+ )
401
+
402
+ try:
403
+ mov_detection_rate = np.where(hit_rate > 0)[0].shape[0] / labels_count
404
+ except ZeroDivisionError:
405
+ logger.warning("no movements in label")
406
+ return 0, 0, 0
407
+
408
+ # calculating TPR and FPR: https://stackoverflow.com/a/40324184/5060208
409
+ CM = confusion_matrix(y_label, prediction)
410
+
411
+ TN = CM[0][0]
412
+ FN = CM[1][0]
413
+ TP = CM[1][1]
414
+ FP = CM[0][1]
415
+ fpr = FP / (FP + TN)
416
+ tpr = TP / (TP + FN)
417
+
418
+ return mov_detection_rate, fpr, tpr
419
+
420
+ def init_cv_res(self) -> None:
421
+ return CV_res(
422
+ get_movement_detection_rate=self.get_movement_detection_rate,
423
+ RUN_BAY_OPT=self.RUN_BAY_OPT,
424
+ mrmr_select=self.mrmr_select,
425
+ model_save=self.model_save,
426
+ )
427
+
428
+ # @staticmethod
429
+ # @jit(nopython=True)
430
+ def append_previous_n_samples(X: np.ndarray, y: np.ndarray, n: int = 5):
431
+ """
432
+ stack feature vector for n samples
433
+ """
434
+ TIME_DIM = X.shape[0] - n
435
+ FEATURE_DIM = int(n * X.shape[1])
436
+ time_arr = np.empty((TIME_DIM, FEATURE_DIM))
437
+ for time_idx, time_ in enumerate(np.arange(n, X.shape[0])):
438
+ for time_point in range(n):
439
+ time_arr[
440
+ time_idx,
441
+ time_point * X.shape[1] : (time_point + 1) * X.shape[1],
442
+ ] = X[time_ - time_point, :]
443
+ return time_arr, y[n:]
444
+
445
+ @staticmethod
446
+ def append_samples_val(X_train, y_train, X_val, y_val, n):
447
+ X_train, y_train = Decoder.append_previous_n_samples(X_train, y_train, n=n)
448
+ X_val, y_val = Decoder.append_previous_n_samples(X_val, y_val, n=n)
449
+ return X_train, y_train, X_val, y_val
450
+
451
+ def fit_model(self, model, X_train, y_train):
452
+ if self.TRAIN_VAL_SPLIT:
453
+ X_train, X_val, y_train, y_val = model_selection.train_test_split(
454
+ X_train, y_train, train_size=0.7, shuffle=False
455
+ )
456
+
457
+ if y_train.sum() == 0 or y_val.sum(0) == 0:
458
+ raise Decoder.ClassMissingException
459
+
460
+ # if type(model) is xgboost.sklearn.XGBClassifier:
461
+ # classes_weights = class_weight.compute_sample_weight(
462
+ # class_weight="balanced", y=y_train
463
+ # )
464
+ # model.set_params(eval_metric="logloss")
465
+ # model.fit(
466
+ # X_train,
467
+ # y_train,
468
+ # eval_set=[(X_val, y_val)],
469
+ # early_stopping_rounds=7,
470
+ # sample_weight=classes_weights,
471
+ # verbose=self.VERBOSE,
472
+ # )
473
+ # elif type(model) is xgboost.sklearn.XGBRegressor:
474
+ # # might be necessary to adapt for other classifiers
475
+ #
476
+ # def evalerror(preds, dtrain):
477
+ # labels = dtrain.get_label()
478
+ # # return a pair metric_name, result. The metric name must not contain a
479
+ # # colon (:) or a space since preds are margin(before logistic
480
+ # # transformation, cutoff at 0)
481
+ #
482
+ # r2 = metrics.r2_score(labels, preds)
483
+ #
484
+ # if r2 < 0:
485
+ # r2 = 0
486
+ #
487
+ # return "r2", -r2
488
+ #
489
+ # model.set_params(eval_metric=evalerror)
490
+ # model.fit(
491
+ # X_train,
492
+ # y_train,
493
+ # eval_set=[(X_val, y_val)],
494
+ # early_stopping_rounds=10,
495
+ # verbose=self.VERBOSE,
496
+ # )
497
+ # else:
498
+ # model.fit(X_train, y_train, eval_set=[(X_val, y_val)])
499
+ else:
500
+ # check for LDA; and apply rebalancing
501
+ if self.oversampling:
502
+ X_train, y_train = self.ros.fit_resample(X_train, y_train)
503
+ if self.undersampling:
504
+ X_train, y_train = self.rus.fit_resample(X_train, y_train)
505
+
506
+ # if type(model) is xgboost.sklearn.XGBClassifier:
507
+ # model.set_params(eval_metric="logloss")
508
+ # model.fit(X_train, y_train)
509
+ # else:
510
+ model.fit(X_train, y_train)
511
+
512
+ return model
513
+
514
+ def eval_model(
515
+ self,
516
+ model_train,
517
+ X_train,
518
+ X_test,
519
+ y_train,
520
+ y_test,
521
+ cv_res: CV_res,
522
+ save_data=True,
523
+ save_probabilities=False,
524
+ ) -> CV_res:
525
+ if self.save_coef:
526
+ cv_res.coef.append(model_train.coef_)
527
+
528
+ y_test_pr = model_train.predict(X_test)
529
+ y_train_pr = model_train.predict(X_train)
530
+
531
+ sc_te = self.eval_method(y_test, y_test_pr)
532
+ sc_tr = self.eval_method(y_train, y_train_pr)
533
+
534
+ if self.threshold_score:
535
+ if sc_tr < 0:
536
+ sc_tr = 0
537
+ if sc_te < 0:
538
+ sc_te = 0
539
+
540
+ if self.get_movement_detection_rate:
541
+ self._set_movement_detection_rates(
542
+ y_test, y_test_pr, y_train, y_train_pr, cv_res
543
+ )
544
+
545
+ cv_res.score_train.append(sc_tr)
546
+ cv_res.score_test.append(sc_te)
547
+ if save_data:
548
+ cv_res.X_train.append(X_train)
549
+ cv_res.X_test.append(X_test)
550
+ if self.model_save:
551
+ cv_res.model_save.append(deepcopy(model_train)) # clone won't copy params
552
+ cv_res.y_train.append(y_train)
553
+ cv_res.y_test.append(y_test)
554
+
555
+ if not save_probabilities:
556
+ cv_res.y_train_pr.append(y_train_pr)
557
+ cv_res.y_test_pr.append(y_test_pr)
558
+ else:
559
+ cv_res.y_train_pr.append(model_train.predict_proba(X_train))
560
+ cv_res.y_test_pr.append(model_train.predict_proba(X_test))
561
+ return cv_res
562
+
563
+ def _set_movement_detection_rates(
564
+ self,
565
+ y_test: np.ndarray,
566
+ y_test_pr: np.ndarray,
567
+ y_train: np.ndarray,
568
+ y_train_pr: np.ndarray,
569
+ cv_res: CV_res,
570
+ ) -> CV_res:
571
+ mov_detection_rate, fpr, tpr = self.calc_movement_detection_rate(
572
+ y_test,
573
+ y_test_pr,
574
+ self.mov_detection_threshold,
575
+ self.min_consequent_count,
576
+ )
577
+
578
+ cv_res.mov_detection_rates_test.append(mov_detection_rate)
579
+ cv_res.tprate_test.append(tpr)
580
+ cv_res.fprate_test.append(fpr)
581
+
582
+ mov_detection_rate, fpr, tpr = self.calc_movement_detection_rate(
583
+ y_train,
584
+ y_train_pr,
585
+ self.mov_detection_threshold,
586
+ self.min_consequent_count,
587
+ )
588
+
589
+ cv_res.mov_detection_rates_train.append(mov_detection_rate)
590
+ cv_res.tprate_train.append(tpr)
591
+ cv_res.fprate_train.append(fpr)
592
+
593
+ return cv_res
594
+
595
+ def wrapper_model_train(
596
+ self,
597
+ X_train,
598
+ y_train,
599
+ X_test=None,
600
+ y_test=None,
601
+ cv_res: CV_res | None = None,
602
+ return_fitted_model_only: bool = False,
603
+ save_data=True,
604
+ ):
605
+ if cv_res is None:
606
+ cv_res = CV_res(
607
+ get_movement_detection_rate=self.get_movement_detection_rate,
608
+ RUN_BAY_OPT=self.RUN_BAY_OPT,
609
+ mrmr_select=self.mrmr_select,
610
+ model_save=self.model_save,
611
+ )
612
+
613
+ model_train = clone(self.model)
614
+ if self.STACK_FEATURES_N_SAMPLES:
615
+ if X_test is not None:
616
+ X_train, y_train, X_test, y_test = Decoder.append_samples_val(
617
+ X_train,
618
+ y_train,
619
+ X_test,
620
+ y_test,
621
+ n=self.time_stack_n_samples,
622
+ )
623
+ else:
624
+ X_train, y_train = Decoder.append_previous_n_samples(
625
+ X_train, y_train, n=self.time_stack_n_samples
626
+ )
627
+
628
+ if y_train.sum() == 0 or (
629
+ y_test is not None and y_test.sum() == 0
630
+ ): # only one class present
631
+ raise Decoder.ClassMissingException
632
+
633
+ if self.RUN_BAY_OPT:
634
+ model_train = self.bay_opt_wrapper(model_train, X_train, y_train)
635
+
636
+ if self.mrmr_select:
637
+ from mrmr import mrmr_classif
638
+
639
+ if len(self.feature_names) > X_train.shape[1]:
640
+ # analyze induvidual ch
641
+ columns_names = [
642
+ col
643
+ for col in self.feature_names
644
+ if col.startswith(self.ch_name_tested)
645
+ ]
646
+ if self.columns_names_single_ch is None:
647
+ self.columns_names_single_ch = [
648
+ f[len(self.ch_name_tested) + 1 :] for f in columns_names
649
+ ]
650
+ else:
651
+ # analyze all_ch_combined
652
+ columns_names = self.feature_names
653
+ X_train = pd.DataFrame(X_train, columns=columns_names)
654
+ X_test = pd.DataFrame(X_test, columns=columns_names)
655
+
656
+ y_train = pd.Series(y_train)
657
+ selected_features = mrmr_classif(X=X_train, y=y_train, K=20, n_jobs=60)
658
+
659
+ X_train = X_train[selected_features]
660
+ X_test = X_test[selected_features]
661
+
662
+ if self.pca:
663
+ from sklearn.decomposition import PCA
664
+
665
+ pca = PCA(n_components=10)
666
+ pca.fit(X_train)
667
+ X_train = pca.transform(X_train)
668
+ X_test = pca.transform(X_test)
669
+
670
+ if self.cca:
671
+ from sklearn.cross_decomposition import CCA
672
+
673
+ cca = CCA(n_components=10)
674
+ cca.fit(X_train, y_train)
675
+ X_train = cca.transform(X_train)
676
+ X_test = cca.transform(X_test)
677
+
678
+ if self.STACK_FEATURES_N_SAMPLES:
679
+ if return_fitted_model_only:
680
+ X_train, y_train = self.append_previous_n_samples(
681
+ X_train, y_train, self.time_stack_n_samples
682
+ )
683
+ else:
684
+ X_train, y_train, X_test, y_test = self.append_samples_val(
685
+ X_train, y_train, X_test, y_test, self.time_stack_n_samples
686
+ )
687
+
688
+ # fit model
689
+ model_train = self.fit_model(model_train, X_train, y_train)
690
+
691
+ if return_fitted_model_only:
692
+ return model_train
693
+
694
+ cv_res = self.eval_model(
695
+ model_train, X_train, X_test, y_train, y_test, cv_res, save_data
696
+ )
697
+
698
+ if self.mrmr_select:
699
+ cv_res.mrmr_select.append(selected_features)
700
+
701
+ return cv_res
702
+
703
+ def run_CV(self, data, label):
704
+ """Evaluate model performance on the specified cross validation.
705
+ If no data and label is specified, use whole feature class attributes.
706
+
707
+ Parameters
708
+ ----------
709
+ data (np.ndarray):
710
+ data to train and test with shape samples, features
711
+ label (np.ndarray):
712
+ label to train and test with shape samples, features
713
+ """
714
+
715
+ def split_data(data):
716
+ if self.cv_method == "NonShuffledTrainTestSplit":
717
+ # set outer 10s set to train index
718
+ # test index is thus in the middle starting at random number
719
+ N_samples = data.shape[0]
720
+ test_area_points = (N_samples - self.sfreq * 10) - (self.sfreq * 10)
721
+ test_points = int(N_samples * 0.3)
722
+
723
+ if test_area_points > test_points:
724
+ start_index = np.random.randint(
725
+ int(self.sfreq * 10),
726
+ N_samples - self.sfreq * 10 - test_points,
727
+ )
728
+ test_index = np.arange(start_index, start_index + test_points)
729
+ train_index = np.concatenate(
730
+ (
731
+ np.arange(0, start_index),
732
+ np.arange(start_index + test_points, N_samples),
733
+ ),
734
+ axis=0,
735
+ ).flatten()
736
+ yield train_index, test_index
737
+ else:
738
+ cv_single_tr_te_split = model_selection.check_cv(
739
+ cv=[
740
+ model_selection.train_test_split(
741
+ np.arange(data.shape[0]),
742
+ test_size=0.3,
743
+ shuffle=False,
744
+ )
745
+ ]
746
+ )
747
+ for (
748
+ train_index,
749
+ test_index,
750
+ ) in cv_single_tr_te_split.split():
751
+ yield train_index, test_index
752
+ else:
753
+ for train_index, test_index in self.cv_method.split(data):
754
+ yield train_index, test_index
755
+
756
+ cv_res = self.init_cv_res()
757
+
758
+ if self.use_nested_cv:
759
+ cv_res_inner = self.init_cv_res()
760
+
761
+ for train_index, test_index in split_data(data):
762
+ X_train, y_train = data[train_index, :], label[train_index]
763
+ X_test, y_test = data[test_index], label[test_index]
764
+ try:
765
+ cv_res = self.wrapper_model_train(
766
+ X_train, y_train, X_test, y_test, cv_res
767
+ )
768
+ except Decoder.ClassMissingException:
769
+ continue
770
+
771
+ if self.use_nested_cv:
772
+ data_inner = data[train_index]
773
+ label_inner = label[train_index]
774
+ for train_index_inner, test_index_inner in split_data(data_inner):
775
+ X_train_inner = data_inner[train_index_inner, :]
776
+ y_train_inner = label_inner[train_index_inner]
777
+ X_test_inner = data_inner[test_index_inner]
778
+ y_test_inner = label_inner[test_index_inner]
779
+ try:
780
+ cv_res_inner = self.wrapper_model_train(
781
+ X_train_inner,
782
+ y_train_inner,
783
+ X_test_inner,
784
+ y_test_inner,
785
+ cv_res_inner,
786
+ )
787
+ except Decoder.ClassMissingException:
788
+ continue
789
+
790
+ self.cv_res = cv_res
791
+ if self.use_nested_cv:
792
+ self.cv_res_inner = cv_res_inner
793
+
794
+ def bay_opt_wrapper(self, model_train, X_train, y_train):
795
+ """Run bayesian optimization and test best params to model_train
796
+ Save best params into self.best_bay_opt_params
797
+ """
798
+
799
+ (
800
+ X_train_bo,
801
+ X_test_bo,
802
+ y_train_bo,
803
+ y_test_bo,
804
+ ) = model_selection.train_test_split(
805
+ X_train, y_train, train_size=0.7, shuffle=False
806
+ )
807
+
808
+ if y_train_bo.sum() == 0 or y_test_bo.sum() == 0:
809
+ logger.critical("could not start Bay. Opt. with no labels > 0")
810
+ raise Decoder.ClassMissingException
811
+
812
+ params_bo = self.run_Bay_Opt(
813
+ X_train_bo, y_train_bo, X_test_bo, y_test_bo, rounds=10
814
+ )
815
+
816
+ # set bay. opt. obtained best params to model
817
+ params_bo_dict = {}
818
+ for i in range(len(params_bo)):
819
+ setattr(model_train, self.bay_opt_param_space[i].name, params_bo[i])
820
+ params_bo_dict[self.bay_opt_param_space[i].name] = params_bo[i]
821
+
822
+ self.best_bay_opt_params.append(params_bo_dict)
823
+
824
+ return model_train
825
+
826
+ def run_Bay_Opt(
827
+ self,
828
+ X_train,
829
+ y_train,
830
+ X_test,
831
+ y_test,
832
+ rounds=30,
833
+ base_estimator="GP",
834
+ acq_func="EI",
835
+ acq_optimizer="sampling",
836
+ initial_point_generator="lhs",
837
+ ):
838
+ """Run skopt bayesian optimization
839
+ skopt.Optimizer:
840
+ https://scikit-optimize.github.io/stable/modules/generated/skopt.Optimizer.html#skopt.Optimizer
841
+
842
+ example:
843
+ https://scikit-optimize.github.io/stable/auto_examples/ask-and-tell.html#sphx-glr-auto-examples-ask-and-tell-py
844
+
845
+ Special attention needs to be made with the run_CV output,
846
+ some metrics are minimized (MAE), some are maximized (r^2)
847
+
848
+ Parameters
849
+ ----------
850
+ X_train: np.ndarray
851
+ y_train: np.ndarray
852
+ X_test: np.ndarray
853
+ y_test: np.ndarray
854
+ rounds : int, optional
855
+ optimizing rounds, by default 10
856
+ base_estimator : str, optional
857
+ surrogate model, used as optimization function instead of cross validation, by default "GP"
858
+ acq_func : str, optional
859
+ function to minimize over the posterior distribution, by default "EI"
860
+ acq_optimizer : str, optional
861
+ method to minimize the acquisition function, by default "sampling"
862
+ initial_point_generator : str, optional
863
+ sets a initial point generator, by default "lhs"
864
+
865
+ Returns
866
+ -------
867
+ skopt result parameters
868
+ """
869
+
870
+ def get_f_val(model_bo):
871
+ try:
872
+ model_bo = self.fit_model(model_bo, X_train, y_train)
873
+ except Decoder.ClassMissingException:
874
+ pass
875
+
876
+ return self.eval_method(y_test, model_bo.predict(X_test))
877
+
878
+ from skopt import Optimizer
879
+
880
+ opt = Optimizer(
881
+ self.bay_opt_param_space,
882
+ base_estimator=base_estimator,
883
+ acq_func=acq_func,
884
+ acq_optimizer=acq_optimizer,
885
+ initial_point_generator=initial_point_generator,
886
+ )
887
+
888
+ for _ in range(rounds):
889
+ next_x = opt.ask()
890
+ # set model values
891
+ model_bo = clone(self.model)
892
+ for i in range(len(next_x)):
893
+ setattr(model_bo, self.bay_opt_param_space[i].name, next_x[i])
894
+ f_val = get_f_val(model_bo)
895
+ res = opt.tell(next_x, f_val)
896
+ if self.VERBOSE:
897
+ logger.info(f_val)
898
+
899
+ # res is here automatically appended by skopt
900
+ return res.x
901
+
902
+ def save(self, feature_path: str, feature_file: str, str_save_add=None) -> None:
903
+ """Save decoder object to pickle"""
904
+
905
+ # why is the decoder not saved to a .json?
906
+
907
+ if str_save_add is None:
908
+ PATH_OUT = PurePath(feature_path, feature_file, feature_file + "_ML_RES.p")
909
+ else:
910
+ PATH_OUT = PurePath(
911
+ feature_path,
912
+ feature_file,
913
+ feature_file + "_" + str_save_add + "_ML_RES.p",
914
+ )
915
+
916
+ logger.info(f"model being saved to: {PATH_OUT}")
917
+ with open(PATH_OUT, "wb") as output: # Overwrites any existing file.
918
+ pickle.dump(self, output)