nkululeko 0.84.1__py3-none-any.whl → 0.85.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.84.1"
1
+ VERSION="0.85.0"
2
2
  SAMPLING_RATE = 16000
nkululeko/experiment.py CHANGED
@@ -340,7 +340,12 @@ class Experiment:
340
340
  df_train, df_test = self.df_train, self.df_test
341
341
  feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
342
342
  self.feats_test, self.feats_train = pd.DataFrame(), pd.DataFrame()
343
- feats_types = self.util.config_val_list("FEATS", "type", ["os"])
343
+ feats_types = self.util.config_val_list("FEATS", "type", [])
344
+ # for some models no features are needed
345
+ if len(feats_types) == 0:
346
+ self.util.debug("no feature extractor specified.")
347
+ self.feats_train, self.feats_test = pd.DataFrame(), pd.DataFrame()
348
+ return
344
349
  self.feature_extractor = FeatureExtractor(
345
350
  df_train, feats_types, feats_name, "train"
346
351
  )
@@ -32,22 +32,19 @@ class Whisper(Featureset):
32
32
  model_name = f"openai/{self.feat_type}"
33
33
  self.model = WhisperModel.from_pretrained(model_name).to(self.device)
34
34
  print(f"intialized Whisper model on {self.device}")
35
- self.feature_extractor = AutoFeatureExtractor.from_pretrained(
36
- model_name)
35
+ self.feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
37
36
  self.model_initialized = True
38
37
 
39
38
  def extract(self):
40
39
  """Extract the features or load them from disk if present."""
41
40
  store = self.util.get_path("store")
42
41
  storage = f"{store}{self.name}.pkl"
43
- extract = self.util.config_val(
44
- "FEATS", "needs_feature_extraction", False)
42
+ extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
45
43
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
46
44
  if extract or no_reuse or not os.path.isfile(storage):
47
45
  if not self.model_initialized:
48
46
  self.init_model()
49
- self.util.debug(
50
- "extracting whisper embeddings, this might take a while...")
47
+ self.util.debug("extracting whisper embeddings, this might take a while...")
51
48
  emb_series = []
52
49
  for (file, start, end), _ in audeer.progress_bar(
53
50
  self.data_df.iterrows(),
nkululeko/modelrunner.py CHANGED
@@ -47,16 +47,12 @@ class Modelrunner:
47
47
  highest = 0
48
48
  else:
49
49
  highest = 100000
50
- # for all epochs
51
- for epoch in range(epoch_num):
52
- if only_test:
53
- self.model.load(self.run, epoch)
54
- self.util.debug(f"reusing model: {self.model.store_path}")
55
- self.model.reset_test(self.df_test, self.feats_test)
56
- else:
57
- self.model.set_id(self.run, epoch)
58
- self.model.train()
50
+ if self.model.model_type == "finetuned":
51
+ # epochs are handled by Huggingface API
52
+ self.model.train()
59
53
  report = self.model.predict()
54
+ # todo: findout the best epoch
55
+ epoch = epoch_num
60
56
  report.set_id(self.run, epoch)
61
57
  plot_name = self.util.get_plot_name() + f"_{self.run}_{epoch:03d}_cnf"
62
58
  reports.append(report)
@@ -67,32 +63,53 @@ class Modelrunner:
67
63
  if plot_epochs:
68
64
  self.util.debug(f"plotting conf matrix to {plot_name}")
69
65
  report.plot_confmatrix(plot_name, epoch)
70
- store_models = self.util.config_val("EXP", "save", False)
71
- plot_best_model = self.util.config_val("PLOT", "best_model", False)
72
- if (store_models or plot_best_model) and (
73
- not only_test
74
- ): # in any case the model needs to be stored to disk.
75
- self.model.store()
76
- if patience:
77
- patience = int(patience)
78
- result = report.result.get_result()
79
- if self.util.high_is_good():
80
- if result > highest:
81
- highest = result
82
- patience_counter = 0
83
- else:
84
- patience_counter += 1
66
+ else:
67
+ # for all epochs
68
+ for epoch in range(epoch_num):
69
+ if only_test:
70
+ self.model.load(self.run, epoch)
71
+ self.util.debug(f"reusing model: {self.model.store_path}")
72
+ self.model.reset_test(self.df_test, self.feats_test)
85
73
  else:
86
- if result < highest:
87
- highest = result
88
- patience_counter = 0
74
+ self.model.set_id(self.run, epoch)
75
+ self.model.train()
76
+ report = self.model.predict()
77
+ report.set_id(self.run, epoch)
78
+ plot_name = self.util.get_plot_name() + f"_{self.run}_{epoch:03d}_cnf"
79
+ reports.append(report)
80
+ self.util.debug(
81
+ f"run: {self.run} epoch: {epoch}: result: "
82
+ f"{reports[-1].get_result().get_test_result()}"
83
+ )
84
+ if plot_epochs:
85
+ self.util.debug(f"plotting conf matrix to {plot_name}")
86
+ report.plot_confmatrix(plot_name, epoch)
87
+ store_models = self.util.config_val("EXP", "save", False)
88
+ plot_best_model = self.util.config_val("PLOT", "best_model", False)
89
+ if (store_models or plot_best_model) and (
90
+ not only_test
91
+ ): # in any case the model needs to be stored to disk.
92
+ self.model.store()
93
+ if patience:
94
+ patience = int(patience)
95
+ result = report.result.get_result()
96
+ if self.util.high_is_good():
97
+ if result > highest:
98
+ highest = result
99
+ patience_counter = 0
100
+ else:
101
+ patience_counter += 1
89
102
  else:
90
- patience_counter += 1
91
- if patience_counter >= patience:
92
- self.util.debug(
93
- f"reached patience ({str(patience)}): early stopping"
94
- )
95
- break
103
+ if result < highest:
104
+ highest = result
105
+ patience_counter = 0
106
+ else:
107
+ patience_counter += 1
108
+ if patience_counter >= patience:
109
+ self.util.debug(
110
+ f"reached patience ({str(patience)}): early stopping"
111
+ )
112
+ break
96
113
 
97
114
  if not plot_epochs:
98
115
  # Do at least one confusion matrix plot
@@ -133,6 +150,12 @@ class Modelrunner:
133
150
  self.model = Bayes_model(
134
151
  self.df_train, self.df_test, self.feats_train, self.feats_test
135
152
  )
153
+ elif model_type == "finetune":
154
+ from nkululeko.models.model_tuned import Pretrained_model
155
+
156
+ self.model = Pretrained_model(
157
+ self.df_train, self.df_test, self.feats_train, self.feats_test
158
+ )
136
159
  elif model_type == "gmm":
137
160
  from nkululeko.models.model_gmm import GMM_model
138
161
 
@@ -1,3 +1,7 @@
1
+ """
2
+ Code based on @jwagner
3
+ """
4
+
1
5
  import dataclasses
2
6
  import typing
3
7
 
@@ -148,6 +152,11 @@ class Model(Wav2Vec2PreTrainedModel):
148
152
  logits_cat=logits_cat,
149
153
  )
150
154
 
155
+ def predict(self, signal):
156
+ result = self(torch.from_numpy(signal))
157
+ result = result[0].detach().numpy()[0]
158
+ return result
159
+
151
160
 
152
161
  class ModelWithPreProcessing(Model):
153
162
 
nkululeko/models/model.py CHANGED
@@ -39,7 +39,7 @@ class Model:
39
39
  self.model_type = type
40
40
 
41
41
  def is_ann(self):
42
- if self.model_type == "ann":
42
+ if (self.model_type == "ann") or (self.model_type == "finetuned"):
43
43
  return True
44
44
  else:
45
45
  return False
@@ -0,0 +1,506 @@
1
+ """
2
+ Code based on @jwagner
3
+ """
4
+
5
+ import audiofile
6
+ import audeer
7
+ import audmetric
8
+ import datasets
9
+ import pandas as pd
10
+ import transformers
11
+ from nkululeko.utils.util import Util
12
+ import nkululeko.glob_conf as glob_conf
13
+ from nkululeko.models.model import Model as BaseModel
14
+
15
+ # import nkululeko.models.finetune_model as fm
16
+ from nkululeko.reporting.reporter import Reporter
17
+ import torch
18
+ import ast
19
+ import numpy as np
20
+ from sklearn.metrics import recall_score
21
+ from collections import OrderedDict
22
+ import os
23
+ import json
24
+ import pickle
25
+ import dataclasses
26
+ import typing
27
+
28
+ import torch
29
+ import transformers
30
+ from transformers.models.wav2vec2.modeling_wav2vec2 import (
31
+ Wav2Vec2PreTrainedModel,
32
+ Wav2Vec2Model,
33
+ )
34
+
35
+
36
+ class Pretrained_model(BaseModel):
37
+
38
+ is_classifier = True
39
+
40
+ def __init__(self, df_train, df_test, feats_train, feats_test):
41
+ """Constructor taking the configuration and all dataframes"""
42
+ super().__init__(df_train, df_test, feats_train, feats_test)
43
+ super().set_model_type("ann")
44
+ self.name = "finetuned_wav2vec2"
45
+ self.model_type = "finetuned"
46
+ self.target = glob_conf.config["DATA"]["target"]
47
+ labels = glob_conf.labels
48
+ self.class_num = len(labels)
49
+ device = self.util.config_val("MODEL", "device", "cpu")
50
+ self.batch_size = int(self.util.config_val("MODEL", "batch_size", "8"))
51
+ if device != "cpu":
52
+ self.util.debug(f"running on device {device}")
53
+ os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
54
+ os.environ["CUDA_VISIBLE_DEVICES"] = device
55
+ self.df_train, self.df_test = df_train, df_test
56
+ self.epoch_num = int(self.util.config_val("EXP", "epochs", 1))
57
+
58
+ self._init_model()
59
+
60
+ def _init_model(self):
61
+ model_path = "facebook/wav2vec2-large-robust-ft-swbd-300h"
62
+ self.num_layers = None
63
+ self.sampling_rate = 16000
64
+ self.max_duration_sec = 8.0
65
+ self.accumulation_steps = 4
66
+ # create dataset
67
+
68
+ dataset = {}
69
+ target_name = glob_conf.target
70
+ data_sources = {
71
+ "train": pd.DataFrame(self.df_train[target_name]),
72
+ "dev": pd.DataFrame(self.df_test[target_name]),
73
+ }
74
+
75
+ for split in ["train", "dev"]:
76
+ df = data_sources[split]
77
+ df[target_name] = df[target_name].astype("float")
78
+
79
+ y = pd.Series(
80
+ data=df.itertuples(index=False, name=None),
81
+ index=df.index,
82
+ dtype=object,
83
+ name="labels",
84
+ )
85
+
86
+ y.name = "targets"
87
+ df = y.reset_index()
88
+ df.start = df.start.dt.total_seconds()
89
+ df.end = df.end.dt.total_seconds()
90
+
91
+ # print(f"{split}: {len(df)}")
92
+
93
+ ds = datasets.Dataset.from_pandas(df)
94
+ dataset[split] = ds
95
+
96
+ self.dataset = datasets.DatasetDict(dataset)
97
+
98
+ # load pre-trained model
99
+ le = glob_conf.label_encoder
100
+ mapping = dict(zip(le.classes_, range(len(le.classes_))))
101
+ target_mapping = {k: int(v) for k, v in mapping.items()}
102
+ target_mapping_reverse = {value: key for key, value in target_mapping.items()}
103
+
104
+ self.config = transformers.AutoConfig.from_pretrained(
105
+ model_path,
106
+ num_labels=len(target_mapping),
107
+ label2id=target_mapping,
108
+ id2label=target_mapping_reverse,
109
+ finetuning_task=target_name,
110
+ )
111
+ if self.num_layers is not None:
112
+ self.config.num_hidden_layers = self.num_layers
113
+ setattr(self.config, "sampling_rate", self.sampling_rate)
114
+ setattr(self.config, "data", self.util.get_data_name())
115
+
116
+ vocab_dict = {}
117
+ with open("vocab.json", "w") as vocab_file:
118
+ json.dump(vocab_dict, vocab_file)
119
+ tokenizer = transformers.Wav2Vec2CTCTokenizer("./vocab.json")
120
+ tokenizer.save_pretrained(".")
121
+
122
+ feature_extractor = transformers.Wav2Vec2FeatureExtractor(
123
+ feature_size=1,
124
+ sampling_rate=16000,
125
+ padding_value=0.0,
126
+ do_normalize=True,
127
+ return_attention_mask=True,
128
+ )
129
+ self.processor = transformers.Wav2Vec2Processor(
130
+ feature_extractor=feature_extractor,
131
+ tokenizer=tokenizer,
132
+ )
133
+ assert self.processor.feature_extractor.sampling_rate == self.sampling_rate
134
+
135
+ self.model = Model.from_pretrained(
136
+ model_path,
137
+ config=self.config,
138
+ )
139
+ self.model.freeze_feature_extractor()
140
+ self.model.train()
141
+ self.model_initialized = True
142
+
143
+ def set_model_type(self, type):
144
+ self.model_type = type
145
+
146
+ def is_ann(self):
147
+ if self.model_type == "ann":
148
+ return True
149
+ else:
150
+ return False
151
+
152
+ def set_testdata(self, data_df, feats_df):
153
+ self.df_test, self.feats_test = data_df, feats_df
154
+
155
+ def reset_test(self, df_test, feats_test):
156
+ self.df_test, self.feats_test = df_test, feats_test
157
+
158
+ def set_id(self, run, epoch):
159
+ self.run = run
160
+ self.epoch = epoch
161
+ dir = self.util.get_path("model_dir")
162
+ name = f"{self.util.get_exp_name(only_train=True)}_{self.run}_{self.epoch:03d}.model"
163
+ self.store_path = dir + name
164
+
165
+ def data_collator(self, data):
166
+ files = [d["file"] for d in data]
167
+ starts = [d["start"] for d in data]
168
+ ends = [d["end"] for d in data]
169
+ targets = [d["targets"] for d in data]
170
+
171
+ signals = []
172
+ for file, start, end in zip(
173
+ files,
174
+ starts,
175
+ ends,
176
+ ):
177
+ offset = start
178
+ duration = end - offset
179
+ if self.max_duration_sec is not None:
180
+ duration = min(duration, self.max_duration_sec)
181
+ signal, _ = audiofile.read(
182
+ file,
183
+ offset=offset,
184
+ duration=duration,
185
+ )
186
+ signals.append(signal.squeeze())
187
+
188
+ input_values = self.processor(
189
+ signals,
190
+ sampling_rate=self.sampling_rate,
191
+ padding=True,
192
+ )
193
+ batch = self.processor.pad(
194
+ input_values,
195
+ padding=True,
196
+ return_tensors="pt",
197
+ )
198
+
199
+ batch["labels"] = torch.tensor(targets)
200
+
201
+ return batch
202
+
203
+ def compute_metrics(self, p: transformers.EvalPrediction):
204
+
205
+ metrics = {
206
+ "UAR": audmetric.unweighted_average_recall,
207
+ "ACC": audmetric.accuracy,
208
+ }
209
+
210
+ truth = p.label_ids[:, 0].astype(int)
211
+ preds = p.predictions
212
+ preds = np.argmax(preds, axis=1)
213
+ scores = {}
214
+ for name, metric in metrics.items():
215
+ scores[f"{name}"] = metric(truth, preds)
216
+ return scores
217
+
218
+ def train(self):
219
+ """Train the model"""
220
+
221
+ model_root = self.util.get_path("model_dir")
222
+ log_root = os.path.join(self.util.get_exp_dir(), "log")
223
+ audeer.mkdir(log_root)
224
+ self.torch_root = audeer.path(model_root, "torch")
225
+ conf_file = os.path.join(self.torch_root, "config.json")
226
+ if os.path.isfile(conf_file):
227
+ self.util.debug(f"reusing finetuned model: {conf_file}")
228
+ self.load(self.run, self.epoch)
229
+ return
230
+ targets = pd.DataFrame(self.dataset["train"]["targets"])
231
+ counts = targets[0].value_counts().sort_index()
232
+ train_weights = 1 / counts
233
+ train_weights /= train_weights.sum()
234
+ # print(train_weights)
235
+ criterion_gender = torch.nn.CrossEntropyLoss(
236
+ weight=torch.Tensor(train_weights).to("cuda"),
237
+ )
238
+
239
+ class Trainer(transformers.Trainer):
240
+
241
+ def compute_loss(
242
+ self,
243
+ model,
244
+ inputs,
245
+ return_outputs=False,
246
+ ):
247
+
248
+ targets = inputs.pop("labels").squeeze()
249
+ targets_gender = targets.type(torch.long)
250
+
251
+ outputs = model(**inputs)
252
+ logits_gender = outputs[0].squeeze()
253
+
254
+ loss_gender = criterion_gender(logits_gender, targets_gender)
255
+
256
+ loss = loss_gender
257
+
258
+ return (loss, outputs) if return_outputs else loss
259
+
260
+ num_steps = (
261
+ len(self.dataset["train"])
262
+ // (self.batch_size * self.accumulation_steps)
263
+ // 5
264
+ )
265
+ num_steps = max(1, num_steps)
266
+ # print(num_steps)
267
+
268
+ training_args = transformers.TrainingArguments(
269
+ output_dir=model_root,
270
+ logging_dir=log_root,
271
+ per_device_train_batch_size=self.batch_size,
272
+ per_device_eval_batch_size=self.batch_size,
273
+ gradient_accumulation_steps=self.accumulation_steps,
274
+ evaluation_strategy="steps",
275
+ num_train_epochs=self.epoch_num,
276
+ fp16=True,
277
+ save_steps=num_steps,
278
+ eval_steps=num_steps,
279
+ logging_steps=num_steps,
280
+ learning_rate=1e-4,
281
+ save_total_limit=2,
282
+ metric_for_best_model="UAR",
283
+ greater_is_better=True,
284
+ load_best_model_at_end=True,
285
+ remove_unused_columns=False,
286
+ report_to="none",
287
+ )
288
+
289
+ trainer = Trainer(
290
+ model=self.model,
291
+ data_collator=self.data_collator,
292
+ args=training_args,
293
+ compute_metrics=self.compute_metrics,
294
+ train_dataset=self.dataset["train"],
295
+ eval_dataset=self.dataset["dev"],
296
+ tokenizer=self.processor.feature_extractor,
297
+ callbacks=[transformers.integrations.TensorBoardCallback()],
298
+ )
299
+ trainer.train()
300
+ trainer.save_model(self.torch_root)
301
+ self.load(self.run, self.epoch)
302
+
303
+ def get_predictions(self):
304
+ results = []
305
+ for (file, start, end), _ in audeer.progress_bar(
306
+ self.df_test.iterrows(),
307
+ total=len(self.df_test),
308
+ desc=f"Predicting {len(self.df_test)} audiofiles",
309
+ ):
310
+ if end == pd.NaT:
311
+ signal, sr = audiofile.read(file, offset=start)
312
+ else:
313
+ signal, sr = audiofile.read(
314
+ file, duration=end - start, offset=start, always_2d=True
315
+ )
316
+ assert sr == self.sampling_rate
317
+ predictions = self.model.predict(signal)
318
+ results.append(predictions.argmax())
319
+ return results
320
+
321
+ def predict(self):
322
+ """Predict the whole eval feature set"""
323
+ predictions = self.get_predictions()
324
+ report = Reporter(
325
+ self.df_test[self.target].to_numpy().astype(float),
326
+ predictions,
327
+ self.run,
328
+ self.epoch,
329
+ )
330
+ return report
331
+
332
+ def predict_sample(self, signal):
333
+ """Predict one sample"""
334
+ prediction = {}
335
+ if self.util.exp_is_classification():
336
+ # get the class probabilities
337
+ predictions = self.model.predict(signal)
338
+ # pred = self.clf.predict(features)
339
+ for i in range(len(self.labels)):
340
+ cat = self.labels[i]
341
+ prediction[cat] = predictions[i]
342
+ else:
343
+ predictions = self.model.predict(signal)
344
+ prediction = predictions
345
+ return prediction
346
+
347
+ def store(self):
348
+ self.util.debug("stored: ")
349
+
350
+ def load(self, run, epoch):
351
+ self.set_id(run, epoch)
352
+ self.model = Model.from_pretrained(
353
+ self.torch_root,
354
+ config=self.config,
355
+ )
356
+ # print(f"loaded model type {type(self.model)}")
357
+
358
+ def load_path(self, path, run, epoch):
359
+ self.set_id(run, epoch)
360
+ with open(path, "rb") as handle:
361
+ self.clf = pickle.load(handle)
362
+
363
+
364
+ @dataclasses.dataclass
365
+ class ModelOutput(transformers.file_utils.ModelOutput):
366
+
367
+ logits_cat: torch.FloatTensor = None
368
+ hidden_states: typing.Tuple[torch.FloatTensor] = None
369
+ cnn_features: torch.FloatTensor = None
370
+
371
+
372
+ class ModelHead(torch.nn.Module):
373
+
374
+ def __init__(self, config, num_labels):
375
+
376
+ super().__init__()
377
+
378
+ self.dense = torch.nn.Linear(config.hidden_size, config.hidden_size)
379
+ self.dropout = torch.nn.Dropout(config.final_dropout)
380
+ self.out_proj = torch.nn.Linear(config.hidden_size, num_labels)
381
+
382
+ def forward(self, features, **kwargs):
383
+
384
+ x = features
385
+ x = self.dropout(x)
386
+ x = self.dense(x)
387
+ x = torch.tanh(x)
388
+ x = self.dropout(x)
389
+ x = self.out_proj(x)
390
+
391
+ return x
392
+
393
+
394
+ class Model(Wav2Vec2PreTrainedModel):
395
+
396
+ def __init__(self, config):
397
+
398
+ super().__init__(config)
399
+
400
+ self.wav2vec2 = Wav2Vec2Model(config)
401
+ self.cat = ModelHead(config, 2)
402
+ self.init_weights()
403
+
404
+ def freeze_feature_extractor(self):
405
+ self.wav2vec2.feature_extractor._freeze_parameters()
406
+
407
+ def pooling(
408
+ self,
409
+ hidden_states,
410
+ attention_mask,
411
+ ):
412
+
413
+ if attention_mask is None: # For evaluation with batch_size==1
414
+ outputs = torch.mean(hidden_states, dim=1)
415
+ else:
416
+ attention_mask = self._get_feature_vector_attention_mask(
417
+ hidden_states.shape[1],
418
+ attention_mask,
419
+ )
420
+ hidden_states = hidden_states * torch.reshape(
421
+ attention_mask,
422
+ (-1, attention_mask.shape[-1], 1),
423
+ )
424
+ outputs = torch.sum(hidden_states, dim=1)
425
+ attention_sum = torch.sum(attention_mask, dim=1)
426
+ outputs = outputs / torch.reshape(attention_sum, (-1, 1))
427
+
428
+ return outputs
429
+
430
+ def forward(
431
+ self,
432
+ input_values,
433
+ attention_mask=None,
434
+ labels=None,
435
+ return_hidden=False,
436
+ ):
437
+
438
+ outputs = self.wav2vec2(
439
+ input_values,
440
+ attention_mask=attention_mask,
441
+ )
442
+
443
+ cnn_features = outputs.extract_features
444
+ hidden_states_framewise = outputs.last_hidden_state
445
+ hidden_states = self.pooling(
446
+ hidden_states_framewise,
447
+ attention_mask,
448
+ )
449
+ logits_cat = self.cat(hidden_states)
450
+
451
+ if not self.training:
452
+ logits_cat = torch.softmax(logits_cat, dim=1)
453
+
454
+ if return_hidden:
455
+
456
+ # make time last axis
457
+ cnn_features = torch.transpose(cnn_features, 1, 2)
458
+
459
+ return ModelOutput(
460
+ logits_cat=logits_cat,
461
+ hidden_states=hidden_states,
462
+ cnn_features=cnn_features,
463
+ )
464
+
465
+ else:
466
+
467
+ return ModelOutput(
468
+ logits_cat=logits_cat,
469
+ )
470
+
471
+ def predict(self, signal):
472
+ result = self(torch.from_numpy(signal))
473
+ result = result[0].detach().numpy()[0]
474
+ return result
475
+
476
+
477
+ class ModelWithPreProcessing(Model):
478
+
479
+ def __init__(self, config):
480
+ super().__init__(config)
481
+
482
+ def forward(
483
+ self,
484
+ input_values,
485
+ ):
486
+ # Wav2Vec2FeatureExtractor.zero_mean_unit_var_norm():
487
+ # normed_slice = (vector - vector[:length].mean()) / np.sqrt(vector[:length].var() + 1e-7)
488
+
489
+ mean = input_values.mean()
490
+
491
+ # var = input_values.var()
492
+ # raises: onnxruntime.capi.onnxruntime_pybind11_state.NotImplemented: [ONNXRuntimeError] : 9 : NOT_IMPLEMENTED : Could not find an implementation for the node ReduceProd_3:ReduceProd(11)
493
+
494
+ var = torch.square(input_values - mean).mean()
495
+ input_values = (input_values - mean) / torch.sqrt(var + 1e-7)
496
+
497
+ output = super().forward(
498
+ input_values,
499
+ return_hidden=True,
500
+ )
501
+
502
+ return (
503
+ output.hidden_states,
504
+ output.logits_cat,
505
+ output.cnn_features,
506
+ )
@@ -53,8 +53,8 @@ def doit(config_file):
53
53
  expr.fill_train_and_tests()
54
54
  util.debug(f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
55
55
 
56
+ model_root = util.get_path("model_dir")
56
57
  log_root = audeer.mkdir("log")
57
- model_root = audeer.mkdir("model")
58
58
  torch_root = audeer.path(model_root, "torch")
59
59
 
60
60
  metrics_gender = {
@@ -69,7 +69,7 @@ def doit(config_file):
69
69
  num_layers = None
70
70
 
71
71
  os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
72
- os.environ["CUDA_VISIBLE_DEVICES"] = "3"
72
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
73
73
 
74
74
  batch_size = 16
75
75
  accumulation_steps = 4
@@ -259,6 +259,7 @@ def doit(config_file):
259
259
  greater_is_better=True,
260
260
  load_best_model_at_end=True,
261
261
  remove_unused_columns=False,
262
+ report_to="none",
262
263
  )
263
264
 
264
265
  trainer = Trainer(
@@ -271,9 +272,20 @@ def doit(config_file):
271
272
  tokenizer=processor.feature_extractor,
272
273
  callbacks=[transformers.integrations.TensorBoardCallback()],
273
274
  )
275
+ if False:
276
+ trainer.train()
277
+ trainer.save_model(torch_root)
274
278
 
275
- trainer.train()
276
- trainer.save_model(torch_root)
279
+ modelnew = fm.Model.from_pretrained(
280
+ torch_root,
281
+ config=config,
282
+ )
283
+ print(f"loaded new model type{type(modelnew)}")
284
+ import audiofile
285
+
286
+ signal, _ = audiofile.read("./test.wav", always_2d=True)
287
+ result = modelnew.predict(signal)
288
+ print(result)
277
289
 
278
290
  print("DONE")
279
291
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.84.1
3
+ Version: 0.85.0
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -333,6 +333,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
333
333
  Changelog
334
334
  =========
335
335
 
336
+ Version 0.85.0
337
+ --------------
338
+ * first version with finetuning wav2vec2 layers
339
+
336
340
  Version 0.84.1
337
341
  --------------
338
342
  * made resample independent of config file
@@ -2,18 +2,18 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
3
  nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=31GQXyAN-nrfQCNIt6_aSkBVeE_J3GO-PklTEy6EgBg,39
5
+ nkululeko/constants.py,sha256=flWSUNQs4r0X0SgoR1I72Mk49cRUdpBN8Zng8sySFBE,39
6
6
  nkululeko/demo.py,sha256=8bl15Kitoesnz8oa8yrs52T6YCSOhWbbq9PnZ8Hj6D0,3232
7
7
  nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
8
8
  nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
9
- nkululeko/experiment.py,sha256=mYdHfInMkuOI3frkZo7oaEe9viO-Qa1ZShyF6MPozcU,30225
9
+ nkululeko/experiment.py,sha256=9Nw23b7sVOciH8IaOuAAKbY7otXYSsPrj_rQCA_U9cc,30465
10
10
  nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
11
11
  nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
12
12
  nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
13
13
  nkululeko/file_checker.py,sha256=LoLnL8aHpW-axMQ46qbqrManTs5otG9ShpEZuz9iRSk,3474
14
14
  nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
15
15
  nkululeko/glob_conf.py,sha256=KL9YJQTHvTztxo1vr25qRRgaPnx4NTg0XrdbovKGMmw,525
16
- nkululeko/modelrunner.py,sha256=GwDXcE2gDQXat4W0-HhHQ1BcUNCRBXMBQ4QycfHp_5c,9288
16
+ nkululeko/modelrunner.py,sha256=pPhvTh1rIrFQg5Ox9T1KoFJ4wRcLCmJl7LFud2DA41w,10464
17
17
  nkululeko/multidb.py,sha256=fG3VukEWP1vreVN4gB1IRXxwwg4jLftsSEYtu0o1f78,5634
18
18
  nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
19
19
  nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
@@ -26,7 +26,7 @@ nkululeko/segment.py,sha256=YLKckX44tbvTb3LrdgYw9X4guzuF27sutl92z9DkpZU,4835
26
26
  nkululeko/syllable_nuclei.py,sha256=Sky-C__MeUDaxqHnDl2TGLLYOYvsahD35TUjWGeG31k,10047
27
27
  nkululeko/test.py,sha256=1w624vo5KTzmFC8BUStGlLDmIEAFuJUz7J0W-gp7AxI,1677
28
28
  nkululeko/test_predictor.py,sha256=_w5J8CxH6hmW3mLTKbdfmywl5QpdNAnW1Y8TE5GtlfE,3237
29
- nkululeko/test_pretrain.py,sha256=4b_39l01dySei_e0ys2NKo9Gipf1Fukp1GvhQllFHt8,8131
29
+ nkululeko/test_pretrain.py,sha256=ZWl-bR6nmeSmXkGAIE6zyfQEjN8Zg0rIxfaS-O6Zbas,8465
30
30
  nkululeko/augmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
31
  nkululeko/augmenting/augmenter.py,sha256=XAt0dpmlnKxqyysqCgV3rcz-pRIvOz7rU7dmGDCVAzs,2905
32
32
  nkululeko/augmenting/randomsplicer.py,sha256=Z5rxdKKUpuncLWuTS6xVfVKUeVbeiYU_dLRHQ5fcg4Y,2669
@@ -68,15 +68,15 @@ nkululeko/feat_extract/feats_squim.py,sha256=Y31YmDmscuG0YozvxyBZIutO3id8t7IZJWC
68
68
  nkululeko/feat_extract/feats_trill.py,sha256=K2ahhdpwpjgg3WZS1POg3UMP2U44i8cLZZvn5Rq7fUI,3228
69
69
  nkululeko/feat_extract/feats_wav2vec2.py,sha256=9WUMfyddB_3nx79g7mZoQrRynhM1uEBWuOotRq8bxoU,5268
70
70
  nkululeko/feat_extract/feats_wavlm.py,sha256=ulxpGjifUFx2ZgGmY32SmBJGIuvkYHoLb2n1LZ8KMwA,4703
71
- nkululeko/feat_extract/feats_whisper.py,sha256=BFspQBI53HAgw22vBEeFskGwFZA-94Rpl17xM458HRo,4576
71
+ nkululeko/feat_extract/feats_whisper.py,sha256=0N7Vj65OVi2PNoB_NrDjWT5lP6xZNKxFOZZIoxkJvcA,4533
72
72
  nkululeko/feat_extract/featureset.py,sha256=HtgW2389rmlRAgFP3F1sSFzq2_iUVr2NhOfIXG9omt0,1448
73
73
  nkululeko/feat_extract/feinberg_praat.py,sha256=EP9pMALjlKdiYInLQdrZ7MmE499Mq-ISRCgqbqL3Rxc,21304
74
74
  nkululeko/losses/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
75
75
  nkululeko/losses/loss_ccc.py,sha256=NOK0y0fxKUnU161B5geap6Fmn8QzoPl2MqtPiV8IuJE,976
76
76
  nkululeko/losses/loss_softf1loss.py,sha256=5gW-PuiqeAZcRgfwjueIOQtMokOjZWgQnVIv59HKTCo,1309
77
77
  nkululeko/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
- nkululeko/models/finetune_model.py,sha256=bx9NsFpEqf_mBohcrf-9lWjrC4AtOIJ7holNXwaFo2Y,4910
79
- nkululeko/models/model.py,sha256=fL6LB6I9Oqo_OWUIptqiu6abuxVYYv8bW2a3m4XSLqU,11601
78
+ nkululeko/models/finetune_model.py,sha256=OMlzDyUFNXZ2xSiqqH8tbzey_KzPJ4jsoYT-4KrWFKM,5091
79
+ nkululeko/models/model.py,sha256=PUCqF2r_dEfmFsZn6Cgr1UIzYvxziLH6nSqZ5-vuN1o,11639
80
80
  nkululeko/models/model_bayes.py,sha256=WJFZ8wFKwWATz6MhmjeZIi1Pal1viU549WL_PjXDSy8,406
81
81
  nkululeko/models/model_cnn.py,sha256=bJxqwe6FnVR2hFeqN6EXexYGgvKYFED1VOhBXVlLWaE,9954
82
82
  nkululeko/models/model_gmm.py,sha256=hZ9UO36KNf48qa3J-xkWIicIj9-TApmt21zNES2vEOs,649
@@ -89,6 +89,7 @@ nkululeko/models/model_svm.py,sha256=rsME3KvKvNG7bdE5lbvYUu85WZhaASZxxmdNDIVJRZ4
89
89
  nkululeko/models/model_svr.py,sha256=_YZeksqB3eBENGlg3g9RwYFlk9rQQ-XCeNBKLlGGVoE,725
90
90
  nkululeko/models/model_tree.py,sha256=rf16faUm4o2LJgkoYpeY998b8DQIvXZ73_m1IS3TnnE,417
91
91
  nkululeko/models/model_tree_reg.py,sha256=IgQcPTE-304HQLYSKPF8Z4ot_Ur9dH01fZjS0nXke_M,428
92
+ nkululeko/models/model_tuned.py,sha256=zmagIE3QHP67_XJCx5r7ZXBojsp6SC8IS-L3XRWmCEk,15650
92
93
  nkululeko/models/model_xgb.py,sha256=Thgx5ESdIok4v72mKh4plxpo4smGcKALWNCJTDScY0M,447
93
94
  nkululeko/models/model_xgr.py,sha256=aGBtNGLWjOE_2rICGYGFxmT8DtnHYsIl1lIpMtghHsY,418
94
95
  nkululeko/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -105,8 +106,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
106
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
106
107
  nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
107
108
  nkululeko/utils/util.py,sha256=b1IHFucRNuF9Iyv5IJeK4AEg0Rga0xKG80UM5GWWdHA,13816
108
- nkululeko-0.84.1.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
109
- nkululeko-0.84.1.dist-info/METADATA,sha256=Y647w-vkRjPG7fssLTEF_Aa_pP74aN-WPCGv6r0_NcE,36420
110
- nkululeko-0.84.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
111
- nkululeko-0.84.1.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
112
- nkululeko-0.84.1.dist-info/RECORD,,
109
+ nkululeko-0.85.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
110
+ nkululeko-0.85.0.dist-info/METADATA,sha256=Zt3H0FmIXOJvzyLOI0aC8VfvjrdIkd4uNvb937luo_k,36499
111
+ nkululeko-0.85.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
112
+ nkululeko-0.85.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
113
+ nkululeko-0.85.0.dist-info/RECORD,,