nkululeko 0.59.1__py3-none-any.whl → 0.61.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. nkululeko/constants.py +1 -1
  2. nkululeko/experiment.py +43 -43
  3. nkululeko/feature_extractor.py +101 -58
  4. nkululeko/modelrunner.py +14 -14
  5. nkululeko/plots.py +11 -0
  6. nkululeko/segment.py +23 -27
  7. nkululeko/test_predictor.py +1 -1
  8. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/METADATA +13 -1
  9. nkululeko-0.61.0.dist-info/RECORD +31 -0
  10. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/WHEEL +1 -1
  11. nkululeko/ap_age.py +0 -31
  12. nkululeko/ap_arousal.py +0 -30
  13. nkululeko/ap_dominance.py +0 -29
  14. nkululeko/ap_gender.py +0 -29
  15. nkululeko/ap_mos.py +0 -35
  16. nkululeko/ap_pesq.py +0 -35
  17. nkululeko/ap_sdr.py +0 -36
  18. nkululeko/ap_snr.py +0 -35
  19. nkululeko/ap_stoi.py +0 -34
  20. nkululeko/ap_valence.py +0 -30
  21. nkululeko/augmenter.py +0 -64
  22. nkululeko/dataset.py +0 -415
  23. nkululeko/dataset_csv.py +0 -49
  24. nkululeko/dataset_ravdess.py +0 -19
  25. nkululeko/estimate_snr.py +0 -89
  26. nkululeko/feats_agender.py +0 -63
  27. nkululeko/feats_agender_agender.py +0 -65
  28. nkululeko/feats_analyser.py +0 -87
  29. nkululeko/feats_audmodel.py +0 -63
  30. nkululeko/feats_audmodel_dim.py +0 -63
  31. nkululeko/feats_clap.py +0 -74
  32. nkululeko/feats_import.py +0 -44
  33. nkululeko/feats_mld.py +0 -47
  34. nkululeko/feats_mos.py +0 -92
  35. nkululeko/feats_opensmile.py +0 -84
  36. nkululeko/feats_oxbow.py +0 -87
  37. nkululeko/feats_praat.py +0 -72
  38. nkululeko/feats_snr.py +0 -63
  39. nkululeko/feats_squim.py +0 -99
  40. nkululeko/feats_trill.py +0 -74
  41. nkululeko/feats_wav2vec2.py +0 -94
  42. nkululeko/featureset.py +0 -41
  43. nkululeko/feinberg_praat.py +0 -430
  44. nkululeko/loss_ccc.py +0 -28
  45. nkululeko/loss_softf1loss.py +0 -40
  46. nkululeko/model.py +0 -256
  47. nkululeko/model_bayes.py +0 -14
  48. nkululeko/model_cnn.py +0 -118
  49. nkululeko/model_gmm.py +0 -16
  50. nkululeko/model_knn.py +0 -16
  51. nkululeko/model_knn_reg.py +0 -16
  52. nkululeko/model_mlp.py +0 -175
  53. nkululeko/model_mlp_regression.py +0 -197
  54. nkululeko/model_svm.py +0 -18
  55. nkululeko/model_svr.py +0 -18
  56. nkululeko/model_tree.py +0 -14
  57. nkululeko/model_tree_reg.py +0 -14
  58. nkululeko/model_xgb.py +0 -12
  59. nkululeko/model_xgr.py +0 -12
  60. nkululeko/randomsplicer.py +0 -76
  61. nkululeko/randomsplicing.py +0 -74
  62. nkululeko-0.59.1.dist-info/RECORD +0 -82
  63. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/LICENSE +0 -0
  64. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/top_level.txt +0 -0
nkululeko/model.py DELETED
@@ -1,256 +0,0 @@
1
- # model.py
2
- from nkululeko.util import Util
3
- import pandas as pd
4
- import numpy as np
5
- import nkululeko.glob_conf as glob_conf
6
- import sklearn.utils
7
- from nkululeko.reporter import Reporter
8
- import ast
9
- from sklearn.model_selection import GridSearchCV
10
- import pickle
11
- import random
12
- from sklearn.model_selection import LeaveOneGroupOut
13
- from sklearn.model_selection import StratifiedKFold
14
-
15
-
16
- class Model:
17
- """Generic model class for linear (non-neural) algorithms"""
18
-
19
- def __init__(self, df_train, df_test, feats_train, feats_test):
20
- """Constructor taking the configuration and all dataframes"""
21
- self.df_train, self.df_test, self.feats_train, self.feats_test = df_train, df_test, feats_train, feats_test
22
- self.util = Util('model')
23
- self.target = self.util.config_val('DATA', 'target', 'emotion')
24
- self.run = 0
25
- self.epoch = 0
26
- self.logo = self.util.config_val('MODEL', 'logo', False)
27
- self.xfoldx = self.util.config_val('MODEL', 'k_fold_cross', False)
28
-
29
- def set_testdata(self, data_df, feats_df):
30
- self.df_test, self.feats_test = data_df, feats_df
31
-
32
- def reset_test(self, df_test, feats_test):
33
- self.df_test, self.feats_test = df_test, feats_test
34
-
35
-
36
- def set_id(self, run, epoch):
37
- self.run = run
38
- self.epoch = epoch
39
- dir = self.util.get_path('model_dir')
40
- name = f'{self.util.get_exp_name(only_train=True)}_{self.run}_{self.epoch:03d}.model'
41
- self.store_path = dir+name
42
-
43
-
44
- def _x_fold_cross(self):
45
- # ignore train and test sets and do a x-fold-cross evaluation
46
- self.util.debug(f'ignoring splits and doing {self.xfoldx} fold cross validation')
47
- feats = pd.concat([self.feats_train, self.feats_test])
48
- annos = pd.concat([self.df_train, self.df_test])
49
- targets = annos[self.target]
50
- _skf = StratifiedKFold(n_splits=int(self.xfoldx))
51
- truths, preds, results = [], [], []
52
- g_index = 0
53
- # leave-one-speaker loop
54
- for train_index, test_index in _skf.split(
55
- feats,
56
- targets,
57
- ):
58
- train_x = feats.iloc[train_index].to_numpy()
59
- train_y = targets[train_index]
60
- self.clf.fit(train_x, train_y)
61
- truth_x = feats.iloc[test_index].to_numpy()
62
- truth_y = targets[test_index]
63
- predict_y = self.clf.predict(truth_x)
64
- report = Reporter(truth_y.astype(float), predict_y, self.run, self.epoch)
65
- self.util.debug(f'result for fold {g_index}: {report.get_result().get_test_result()} ')
66
- results.append(float(report.get_result().test))
67
- truths.append(truth_y)
68
- preds.append(predict_y)
69
- g_index += 1
70
-
71
- # combine speaker folds
72
- truth = pd.concat(truths)
73
- truth.name = 'truth'
74
- pred = pd.Series(
75
- np.concatenate(preds),
76
- index=truth.index,
77
- name='prediction',
78
- )
79
- self.truths = truth
80
- self.preds = pred
81
- results = np.asarray(results)
82
- self.util.debug(f'KFOLD: {self.xfoldx} folds: mean {results.mean():.3f}, std: {results.std():.3f}')
83
- def _do_logo(self):
84
- # ignore train and test sets and do a "leave one speaker group out" evaluation
85
- self.util.debug(f'ignoring splits and doing LOGO with {self.logo} groups')
86
- logo = int(self.logo)
87
- feats = pd.concat([self.feats_train, self.feats_test])
88
- annos = pd.concat([self.df_train, self.df_test])
89
- targets = annos[self.target]
90
- _logo = LeaveOneGroupOut()
91
- truths, preds, results = [], [], []
92
- # get unique list of speakers
93
- speakers = annos['speaker'].unique()
94
- # check for folds columns
95
- if not 'fold' in annos.columns:
96
- self.util.debug(f'creating random folds for {logo} groups')
97
- # create a random dictionary of groups
98
- sdict = {}
99
- # randomize the speaker order
100
- random.shuffle(speakers)
101
- folds = list(range(logo))
102
- for i, s in enumerate(speakers):
103
- sdict[s] = folds[i % len(folds)]
104
- # add this to the annotations
105
- annos['fold'] = annos['speaker'].apply(lambda x: str(sdict[x]))
106
- else:
107
- fold_count = annos['fold'].nunique()
108
- self.util.debug(f'using existing folds for {fold_count} groups')
109
- g_index = 0
110
- # leave-one-group loop
111
- for train_index, test_index in _logo.split(
112
- feats,
113
- targets,
114
- groups=annos['fold'],
115
- ):
116
- train_x = feats.iloc[train_index].to_numpy()
117
- train_y = targets[train_index]
118
- self.clf.fit(train_x, train_y)
119
-
120
- truth_x = feats.iloc[test_index].to_numpy()
121
- truth_y = targets[test_index]
122
- predict_y = self.clf.predict(truth_x)
123
- report = Reporter(truth_y.astype(float), predict_y, self.run, self.epoch)
124
- result = report.get_result().get_test_result()
125
- self.util.debug(f'result for speaker group {g_index}: {result} ')
126
- results.append(float(report.get_result().test))
127
- truths.append(truth_y)
128
- preds.append(predict_y)
129
- g_index += 1
130
-
131
- # combine speaker folds
132
- truth = pd.concat(truths)
133
- truth.name = 'truth'
134
- pred = pd.Series(
135
- np.concatenate(preds),
136
- index=truth.index,
137
- name='prediction',
138
- )
139
- self.truths = truth
140
- self.preds = pred
141
- results = np.asarray(results)
142
- self.util.debug(f'LOGO: {self.logo} folds: mean {results.mean():.3f}, std: {results.std():.3f}')
143
-
144
- def train(self):
145
- """Train the model"""
146
- # # first check if the model already has been trained
147
- # if os.path.isfile(self.store_path):
148
- # self.load(self.run, self.epoch)
149
- # self.util.debug(f'reusing model: {self.store_path}')
150
- # return
151
-
152
- # then if leave one speaker group out validation is wanted
153
- if self.logo:
154
- self._do_logo()
155
- return
156
- # then if x fold cross validation is wanted
157
- if self.xfoldx:
158
- self._x_fold_cross()
159
- return
160
-
161
- # check for NANs in the features
162
- # set up the data_loaders
163
- if self.feats_train.isna().to_numpy().any():
164
- self.util.debug(f'Model, train: replacing {self.feats_train.isna().sum().sum()} NANs with 0')
165
- self.feats_train = self.feats_train.fillna(0)
166
- # remove labels from features
167
- feats = self.feats_train.to_numpy()
168
- # compute class weights
169
- if self.util.config_val('MODEL', 'class_weight', False):
170
- self.classes_weights = sklearn.utils.class_weight.compute_sample_weight(
171
- class_weight='balanced',
172
- y=self.df_train[self.target]
173
- )
174
-
175
- tuning_params = self.util.config_val('MODEL', 'tuning_params', False)
176
- if tuning_params:
177
- # tune the model meta parameters
178
- tuning_params = ast.literal_eval(tuning_params)
179
- tuned_params={}
180
- try:
181
- scoring = glob_conf.config['MODEL']['scoring']
182
- except KeyError:
183
- self.util.error('got tuning params but no scoring')
184
- for param in tuning_params:
185
- values = ast.literal_eval(glob_conf.config['MODEL'][param])
186
- tuned_params[param] = values
187
- self.util.debug(f'tuning on {tuned_params}')
188
- self.clf = GridSearchCV(self.clf, tuned_params, refit = True, verbose = 3, scoring=scoring)
189
- try:
190
- class_weight = self.util.config_val('MODEL', 'class_weight', False)
191
- if class_weight:
192
- self.util.debug('using class weight')
193
- self.clf.fit(feats, self.df_train[self.target], sample_weight=self.classes_weights)
194
- else:
195
- self.clf.fit(feats, self.df_train[self.target])
196
- except KeyError:
197
- self.clf.fit(feats, self.df_train[self.target])
198
- self.util.debug(f'winner parameters: {self.clf.best_params_}')
199
- else:
200
- class_weight = self.util.config_val('MODEL', 'class_weight', False)
201
- if class_weight:
202
- self.util.debug('using class weight')
203
- self.clf.fit(feats, self.df_train[self.target], sample_weight=self.classes_weights)
204
- else:
205
- labels = self.df_train[self.target]
206
- self.clf.fit(feats, labels)
207
-
208
- def get_predictions(self):
209
- predictions = self.clf.predict(self.feats_test.to_numpy())
210
- return predictions
211
-
212
- def predict(self):
213
- if self.feats_test.isna().to_numpy().any():
214
- self.util.debug(f'Model, test: replacing {self.feats_test.isna().sum().sum()} NANs with 0')
215
- self.feats_test = self.feats_test.fillna(0)
216
- if self.logo or self.xfoldx:
217
- report = Reporter(self.truths.astype(float), self.preds, self.run, self.epoch)
218
- return report
219
- """Predict the whole eval feature set"""
220
- predictions = self.get_predictions()
221
- report = Reporter(self.df_test[self.target]\
222
- .to_numpy().astype(float), predictions, self.run, self.epoch)
223
- return report
224
-
225
- def predict_sample(self, features):
226
- """Predict one sample"""
227
- prediction = {}
228
- if self.util.exp_is_classification():
229
- # get the class probabilities
230
- predictions = self.clf.predict_proba(features)
231
- # pred = self.clf.predict(features)
232
- for i in range(len(self.clf.classes_)):
233
- cat = self.clf.classes_[i]
234
- prediction[cat] = predictions[0][i]
235
- else:
236
- predictions = self.clf.predict(features)
237
- prediction['result'] = predictions[0]
238
- return prediction
239
-
240
-
241
- def store(self):
242
- with open(self.store_path, 'wb') as handle:
243
- pickle.dump(self.clf, handle)
244
-
245
-
246
- def load(self, run, epoch):
247
- self.set_id(run, epoch)
248
- dir = self.util.get_path('model_dir')
249
- name = f'{self.util.get_exp_name(only_train=True)}_{self.run}_{self.epoch:03d}.model'
250
- with open(dir+name, 'rb') as handle:
251
- self.clf = pickle.load(handle)
252
-
253
- def load_path(self, path, run, epoch):
254
- self.set_id(run, epoch)
255
- with open(path, 'rb') as handle:
256
- self.clf = pickle.load(handle)
nkululeko/model_bayes.py DELETED
@@ -1,14 +0,0 @@
1
- # model_bayes.py
2
-
3
- from sklearn.naive_bayes import GaussianNB
4
- from nkululeko.model import Model
5
-
6
- class Bayes_model(Model):
7
- is_classifier = True
8
-
9
- """An SVM model"""
10
- def __init__(self, df_train, df_test, feats_train, feats_test):
11
- super().__init__(df_train, df_test, feats_train, feats_test)
12
- c = float(self.util.config_val('MODEL', 'C_val', '0.001'))
13
- self.clf = GaussianNB() # set up the classifier
14
-
nkululeko/model_cnn.py DELETED
@@ -1,118 +0,0 @@
1
- # cnnmodel.py
2
-
3
- from sklearn.utils import resample
4
- from nkululeko.model import Model
5
- import torch
6
- from sklearn.metrics import mean_squared_error
7
- import nkululeko.glob_conf as glob_conf
8
- from nkululeko.reporter import Reporter
9
- import numpy as np
10
- from loss_ccc import ConcordanceCorCoeff
11
-
12
- class CNN_model(Model):
13
- """A CNN (convolutional neural net) model"""
14
- is_classifier = True
15
-
16
- def __init__(self, df_train, df_test, feats_train, feats_test):
17
- """Constructor taking the configuration and all dataframes"""
18
- Model.__init__(self, df_train, df_test, feats_train, feats_test)
19
- self.util.debug(f'initializing model')
20
- self.device = glob_conf.config['MODEL']['device']
21
- pretrained_state = self.util.config_val('MODEL', 'pre_train', 'gender_state.pth.tar')
22
- state = torch.load(pretrained_state)
23
- state.pop('out.gender.weight')
24
- state.pop('out.gender.bias')
25
- state['fc1.weight'] = state.pop('fc1.gender.weight')
26
- state['fc1.bias'] = state.pop('fc1.gender.bias')
27
- model = audpann.Cnn10(sampling_rate=16000, output_dim=1)
28
- model.load_state_dict(state, strict=False)
29
- self.model = model.to(self.device)
30
- self.loss_func = self.util.config_val('MODEL', 'loss', 'mse')
31
- if self.loss_func == 'mse':
32
- self.criterion = torch.nn.MSELoss()
33
- elif self.loss_func == '1-ccc':
34
- self.criterion = ConcordanceCorCoeff()
35
- else:
36
- self.util.error(f'unknown loss function: {self.loss_func}')
37
- self.util.debug(f'training model with {self.loss_func} loss function')
38
- self.learning_rate = float(self.util.config_val('MODEL', 'learning_rate', 0.0001))
39
- self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
40
-
41
- def train(self):
42
- """Train the model one epoch"""
43
- losses = []
44
- # first check if the model already has been trained
45
- # if os.path.isfile(self.store_path):
46
- # self.load(self.run, self.epoch)
47
- # self.util.debug(f'reusing model: {self.store_path}')
48
- # return
49
-
50
- self.model.train()
51
-
52
- for features, labels in self.feats_train:
53
- logits = self.model(features.to(self.device).float()).squeeze(1)
54
- loss = self.criterion(logits, labels.float().to(self.device))
55
- losses.append(loss.item())
56
- self.optimizer.zero_grad()
57
- loss.backward()
58
- self.optimizer.step()
59
- self.loss = (np.asarray(losses)).mean()
60
-
61
- def predict(self):
62
- """Predict the whole eval feature set"""
63
- # evaluate on dev set
64
- _, truths, predictions = self.evaluate_model(False)
65
- # evaluate on train set, if there is one
66
- result = 0
67
- if self.feats_train != None:
68
- result, _, _ = self.evaluate_model(True)
69
- report = Reporter(truths.numpy(), predictions.numpy(), self.run, self.epoch)
70
- try:
71
- report.result.loss = self.loss
72
- except AttributeError: # if the model was loaded from disk the loss is unknown
73
- pass
74
- report.result.train = result
75
- return report
76
-
77
-
78
- def evaluate_model(self, on_train=False):
79
- if on_train:
80
- loader = self.feats_train
81
- else:
82
- loader = self.feats_test
83
- logits = torch.zeros(len(loader.dataset))
84
- targets = torch.zeros(len(loader.dataset))
85
- self.model.eval()
86
- with torch.no_grad():
87
- for index, (features, labels) in enumerate(loader):
88
- start_index = index * loader.batch_size
89
- end_index = (index + 1) * loader.batch_size
90
- if end_index > len(loader.dataset):
91
- end_index = len(loader.dataset)
92
- logits[start_index:end_index] = self.model(features.to(self.device).float()).squeeze(1)
93
- targets[start_index:end_index] = labels
94
-
95
- measure = self.util.config_val('MODEL', 'measure', 'mse')
96
- if measure == 'mse':
97
- result = mean_squared_error(targets.numpy(), logits.numpy())
98
- elif measure == 'ccc':
99
- result = Reporter.ccc(targets.numpy(), logits.numpy())
100
- else:
101
- self.util.error(f'unknown measure: {measure}')
102
- return result, targets, logits
103
-
104
- def store(self):
105
- torch.save(self.model.state_dict(), self.store_path)
106
- self.device = self.util.config_val('MODEL', 'device', 'cpu')
107
- # self.model.to(self.device)
108
-
109
- def load(self, run, epoch):
110
- self.set_id(run, epoch)
111
- dir = self.util.get_path('model_dir')
112
- name = f'{self.util.get_exp_name(only_train=True)}_{run}_{epoch:03d}.model'
113
- self.device = self.util.config_val('MODEL', 'device', 'cpu')
114
- self.store_path = dir+name
115
- self.model = audpann.Cnn10(sampling_rate=16000, output_dim=1)
116
- state_dict = torch.load(dir+name, map_location='cpu')
117
- self.model.load_state_dict(state_dict, strict=False)
118
- self.model.to(self.device)
nkululeko/model_gmm.py DELETED
@@ -1,16 +0,0 @@
1
- # model_gmm.py
2
-
3
- from sklearn import mixture
4
- from nkululeko.model import Model
5
-
6
- class GMM_model(Model):
7
- """An GMM model"""
8
-
9
- is_classifier = True
10
- def __init__(self, df_train, df_test, feats_train, feats_test):
11
- super().__init__(df_train, df_test, feats_train, feats_test)
12
- n_components = int(self.util.config_val('MODEL', 'GMM_components', '4'))
13
- covariance_type = self.util.config_val('MODEL', 'GMM_covariance_type', 'full')
14
- self.clf = mixture.GaussianMixture(n_components=n_components, covariance_type=covariance_type)
15
- # set up the classifier
16
-
nkululeko/model_knn.py DELETED
@@ -1,16 +0,0 @@
1
- # model_knn.py
2
-
3
- from sklearn.neighbors import KNeighborsClassifier
4
- from nkululeko.model import Model
5
-
6
- class KNN_model(Model):
7
- """An KNN model"""
8
-
9
- is_classifier = True
10
-
11
- def __init__(self, df_train, df_test, feats_train, feats_test):
12
- super().__init__(df_train, df_test, feats_train, feats_test)
13
- method = self.util.config_val('MODEL', 'KNN_weights', 'uniform')
14
- k = int(self.util.config_val('MODEL', 'K_val', '5'))
15
- self.clf = KNeighborsClassifier(n_neighbors=k, weights=method) # set up the classifier
16
-
@@ -1,16 +0,0 @@
1
- # model_knn_reg.py
2
-
3
- from sklearn.neighbors import KNeighborsRegressor
4
- from nkululeko.model import Model
5
-
6
- class KNN_reg_model(Model):
7
- """An KNN model"""
8
-
9
- is_classifier = False
10
-
11
- def __init__(self, df_train, df_test, feats_train, feats_test):
12
- super().__init__(df_train, df_test, feats_train, feats_test)
13
- method = self.util.config_val('MODEL', 'KNN_weights', 'uniform')
14
- k = int(self.util.config_val('MODEL', 'K_val', '5'))
15
- self.clf = KNeighborsRegressor(n_neighbors=k, weights=method) # set up the classifier
16
-
nkululeko/model_mlp.py DELETED
@@ -1,175 +0,0 @@
1
- # model_mlp.py
2
- from nkululeko.util import Util
3
- import nkululeko.glob_conf as glob_conf
4
- from nkululeko.model import Model
5
- from nkululeko.reporter import Reporter
6
- import torch
7
- import ast
8
- import numpy as np
9
- from sklearn.metrics import recall_score
10
- from collections import OrderedDict
11
- from nkululeko.loss_softf1loss import SoftF1Loss
12
-
13
- class MLP_model(Model):
14
- """MLP = multi layer perceptron"""
15
-
16
- is_classifier = True
17
-
18
- def __init__(self, df_train, df_test, feats_train, feats_test):
19
- """Constructor taking the configuration and all dataframes"""
20
- super().__init__(df_train, df_test, feats_train, feats_test)
21
- self.target = glob_conf.config['DATA']['target']
22
- labels = ast.literal_eval(glob_conf.config['DATA']['labels'])
23
- self.class_num = len(labels)
24
- # set up loss criterion
25
- criterion = self.util.config_val('MODEL', 'loss', 'cross')
26
- if criterion == 'cross':
27
- self.criterion = torch.nn.CrossEntropyLoss()
28
- elif criterion == 'f1':
29
- self.criterion = SoftF1Loss(num_classes=self.class_num, weight=None, epsilon=1e-7)
30
- else:
31
- self.util.error(f'unknown loss function: {criterion}')
32
- self.util.debug(f'using model with cross entropy loss function')
33
- # set up the model
34
- self.device = self.util.config_val('MODEL', 'device', 'cpu')
35
- layers_string = glob_conf.config['MODEL']['layers']
36
- self.util.debug(f'using layers {layers_string}')
37
- layers = ast.literal_eval(layers_string)
38
- # with dropout?
39
- drop = self.util.config_val('MODEL', 'drop', False)
40
- if drop:
41
- self.util.debug(f'init: training with dropout: {drop}')
42
- self.model = self.MLP(feats_train.shape[1], layers, self.class_num, drop).to(self.device)
43
- self.learning_rate = float(self.util.config_val('MODEL', 'learning_rate', 0.0001))
44
- # set up regularization
45
- self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
46
- # batch size
47
- self.batch_size = int(self.util.config_val('MODEL', 'batch_size', 8))
48
- # number of parallel processes
49
- self.num_workers = int(self.util.config_val('MODEL', 'num_workers', 5))
50
- if feats_train.isna().to_numpy().any():
51
- self.util.debug(f'Model, train: replacing {feats_train.isna().sum().sum()} NANs with 0')
52
- feats_train = feats_train.fillna(0)
53
- if feats_test.isna().to_numpy().any():
54
- self.util.debug(f'Model, test: replacing {feats_test.isna().sum().sum()} NANs with 0')
55
- feats_test = feats_test.fillna(0)
56
- # set up the data_loaders
57
- self.trainloader = self.get_loader(feats_train, df_train, True)
58
- self.testloader = self.get_loader(feats_test, df_test, False)
59
-
60
- def set_testdata(self, data_df, feats_df):
61
- self.testloader = self.get_loader(feats_df, data_df, False)
62
-
63
- def reset_test(self, df_test, feats_test):
64
- self.testloader = self.get_loader(feats_test, df_test, False)
65
-
66
- def train(self):
67
- self.model.train()
68
- losses = []
69
- for features, labels in self.trainloader:
70
- logits = self.model(features.to(self.device))
71
- loss = self.criterion(logits, labels.to(self.device, dtype=torch.int64))
72
- losses.append(loss.item())
73
- self.optimizer.zero_grad()
74
- loss.backward()
75
- self.optimizer.step()
76
- self.loss = (np.asarray(losses)).mean()
77
-
78
- def predict(self):
79
- _, truths, predictions = self.evaluate_model(self.model, self.testloader, self.device)
80
- uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
81
- report = Reporter(truths, predictions, self.run, self.epoch)
82
- try:
83
- report.result.loss = self.loss
84
- except AttributeError: # if the model was loaded from disk the loss is unknown
85
- pass
86
- report.result.train = uar
87
- return report
88
-
89
- def get_predictions(self):
90
- _, truths, predictions = self.evaluate_model(self.model, self.testloader, self.device)
91
- return predictions.numpy()
92
-
93
- def get_loader(self, df_x, df_y, shuffle):
94
- data=[]
95
- for i in range(len(df_x)):
96
- data.append([df_x.values[i], df_y[self.target][i]])
97
- return torch.utils.data.DataLoader(data, shuffle=shuffle, batch_size=self.batch_size)
98
-
99
- class MLP(torch.nn.Module):
100
- def __init__(self, i, layers, o, drop):
101
- super().__init__()
102
- sorted_layers = sorted(layers.items(), key=lambda x: x[1])
103
- layers = OrderedDict()
104
- layers['0'] = torch.nn.Linear(i, sorted_layers[0][1])
105
- layers['0_r'] = torch.nn.ReLU()
106
- for i in range(0, len(sorted_layers)-1):
107
- layers[str(i+1)] = torch.nn.Linear(sorted_layers[i][1], sorted_layers[i+1][1])
108
- if drop:
109
- layers[str(i)+'_d'] = torch.nn.Dropout(float(drop))
110
- layers[str(i)+'_r'] = torch.nn.ReLU()
111
- layers[str(len(sorted_layers)+1)] = torch.nn.Linear(sorted_layers[-1][1], o)
112
- self.linear = torch.nn.Sequential(layers)
113
- def forward(self, x):
114
- # x: (batch_size, channels, samples)
115
- x = x.squeeze(dim=1).float()
116
- return self.linear(x)
117
-
118
-
119
- def evaluate_model(self, model, loader, device):
120
- logits = torch.zeros(len(loader.dataset), self.class_num)
121
- targets = torch.zeros(len(loader.dataset))
122
- model.eval()
123
- with torch.no_grad():
124
- for index, (features, labels) in enumerate(loader):
125
- start_index = index * loader.batch_size
126
- end_index = (index + 1) * loader.batch_size
127
- if end_index > len(loader.dataset):
128
- end_index = len(loader.dataset)
129
- logits[start_index:end_index, :] = model(features.to(device))
130
- targets[start_index:end_index] = labels
131
-
132
- predictions = logits.argmax(dim=1)
133
- uar = recall_score(targets.numpy(), predictions.numpy(), average='macro')
134
- return uar, targets, predictions
135
-
136
- def predict_sample(self, features):
137
- """Predict one sample"""
138
- with torch.no_grad():
139
- logits = self.model(torch.from_numpy(features).to(self.device))
140
- a = logits.numpy()
141
- res = {}
142
- for i in range(len(a[0])):
143
- res[i] = a[0][i]
144
- return res
145
-
146
- def store(self):
147
- torch.save(self.model.state_dict(), self.store_path)
148
-
149
- def load(self, run, epoch):
150
- self.set_id(run, epoch)
151
- dir = self.util.get_path('model_dir')
152
- # name = f'{self.util.get_exp_name()}_{run}_{epoch:03d}.model'
153
- name = f'{self.util.get_exp_name(only_train=True)}_{self.run}_{self.epoch:03d}.model'
154
- self.device = self.util.config_val('MODEL', 'device', 'cpu')
155
- layers = ast.literal_eval(glob_conf.config['MODEL']['layers'])
156
- self.store_path = dir+name
157
- drop = self.util.config_val('MODEL', 'drop', False)
158
- if drop:
159
- self.util.debug(f'loading: dropout set to: {drop}')
160
- self.model = self.MLP(self.feats_train.shape[1], layers, self.class_num, drop).to(self.device)
161
- self.model.load_state_dict(torch.load(self.store_path))
162
- self.model.eval()
163
-
164
- def load_path(self, path, run, epoch):
165
- self.set_id(run, epoch)
166
- with open(path, 'rb') as handle:
167
- self.device = self.util.config_val('MODEL', 'device', 'cpu')
168
- layers = ast.literal_eval(glob_conf.config['MODEL']['layers'])
169
- self.store_path = path
170
- drop = self.util.config_val('MODEL', 'drop', False)
171
- if drop:
172
- self.util.debug(f'dropout set to: {drop}')
173
- self.model = self.MLP(self.feats_train.shape[1], layers, self.class_num, drop).to(self.device)
174
- self.model.load_state_dict(torch.load(self.store_path))
175
- self.model.eval()