nkululeko 0.90.0__py3-none-any.whl → 0.90.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. nkululeko/aug_train.py +6 -4
  2. nkululeko/augment.py +6 -4
  3. nkululeko/augmenting/augmenter.py +4 -4
  4. nkululeko/augmenting/randomsplicer.py +6 -6
  5. nkululeko/augmenting/randomsplicing.py +2 -3
  6. nkululeko/augmenting/resampler.py +9 -6
  7. nkululeko/autopredict/ap_age.py +4 -2
  8. nkululeko/autopredict/ap_arousal.py +4 -2
  9. nkululeko/autopredict/ap_dominance.py +3 -2
  10. nkululeko/autopredict/ap_gender.py +4 -2
  11. nkululeko/autopredict/ap_mos.py +5 -2
  12. nkululeko/autopredict/ap_pesq.py +5 -2
  13. nkululeko/autopredict/ap_sdr.py +5 -2
  14. nkululeko/autopredict/ap_snr.py +5 -2
  15. nkululeko/autopredict/ap_stoi.py +5 -2
  16. nkululeko/autopredict/ap_valence.py +4 -2
  17. nkululeko/autopredict/estimate_snr.py +10 -14
  18. nkululeko/cacheddataset.py +1 -1
  19. nkululeko/constants.py +1 -1
  20. nkululeko/data/dataset.py +11 -14
  21. nkululeko/data/dataset_csv.py +5 -3
  22. nkululeko/demo-ft.py +29 -0
  23. nkululeko/demo_feats.py +5 -4
  24. nkululeko/demo_predictor.py +3 -4
  25. nkululeko/ensemble.py +27 -28
  26. nkululeko/experiment.py +3 -5
  27. nkululeko/experiment_felix.py +728 -0
  28. nkululeko/explore.py +1 -0
  29. nkululeko/export.py +7 -5
  30. nkululeko/feat_extract/feats_agender.py +5 -4
  31. nkululeko/feat_extract/feats_agender_agender.py +7 -6
  32. nkululeko/feat_extract/feats_analyser.py +18 -16
  33. nkululeko/feat_extract/feats_ast.py +9 -8
  34. nkululeko/feat_extract/feats_auddim.py +3 -5
  35. nkululeko/feat_extract/feats_audmodel.py +2 -2
  36. nkululeko/feat_extract/feats_clap.py +9 -12
  37. nkululeko/feat_extract/feats_hubert.py +2 -3
  38. nkululeko/feat_extract/feats_import.py +5 -4
  39. nkululeko/feat_extract/feats_mld.py +3 -5
  40. nkululeko/feat_extract/feats_mos.py +4 -3
  41. nkululeko/feat_extract/feats_opensmile.py +4 -3
  42. nkululeko/feat_extract/feats_oxbow.py +5 -4
  43. nkululeko/feat_extract/feats_praat.py +4 -7
  44. nkululeko/feat_extract/feats_snr.py +3 -5
  45. nkululeko/feat_extract/feats_spectra.py +8 -9
  46. nkululeko/feat_extract/feats_spkrec.py +6 -11
  47. nkululeko/feat_extract/feats_squim.py +2 -4
  48. nkululeko/feat_extract/feats_trill.py +2 -5
  49. nkululeko/feat_extract/feats_wav2vec2.py +8 -4
  50. nkululeko/feat_extract/feats_wavlm.py +2 -3
  51. nkululeko/feat_extract/feats_whisper.py +4 -6
  52. nkululeko/feat_extract/featureset.py +4 -2
  53. nkululeko/feat_extract/feinberg_praat.py +1 -3
  54. nkululeko/feat_extract/transformer_feature_extractor.py +147 -0
  55. nkululeko/file_checker.py +3 -3
  56. nkululeko/filter_data.py +3 -1
  57. nkululeko/fixedsegment.py +83 -0
  58. nkululeko/models/model.py +3 -5
  59. nkululeko/models/model_bayes.py +1 -0
  60. nkululeko/models/model_cnn.py +4 -6
  61. nkululeko/models/model_gmm.py +13 -9
  62. nkululeko/models/model_knn.py +1 -0
  63. nkululeko/models/model_knn_reg.py +1 -0
  64. nkululeko/models/model_lin_reg.py +1 -0
  65. nkululeko/models/model_mlp.py +2 -3
  66. nkululeko/models/model_mlp_regression.py +1 -6
  67. nkululeko/models/model_svm.py +2 -2
  68. nkululeko/models/model_svr.py +1 -0
  69. nkululeko/models/model_tree.py +2 -3
  70. nkululeko/models/model_tree_reg.py +1 -0
  71. nkululeko/models/model_tuned.py +88 -41
  72. nkululeko/models/model_xgb.py +1 -0
  73. nkululeko/models/model_xgr.py +1 -0
  74. nkululeko/multidb.py +1 -0
  75. nkululeko/nkululeko.py +1 -1
  76. nkululeko/predict.py +4 -5
  77. nkululeko/reporting/defines.py +6 -8
  78. nkululeko/reporting/latex_writer.py +3 -3
  79. nkululeko/reporting/report.py +2 -2
  80. nkululeko/reporting/report_item.py +1 -0
  81. nkululeko/reporting/reporter.py +20 -19
  82. nkululeko/resample.py +8 -12
  83. nkululeko/resample_cli.py +99 -0
  84. nkululeko/runmanager.py +3 -1
  85. nkululeko/scaler.py +1 -1
  86. nkululeko/segment.py +6 -5
  87. nkululeko/segmenting/seg_inaspeechsegmenter.py +3 -3
  88. nkululeko/segmenting/seg_silero.py +4 -4
  89. nkululeko/syllable_nuclei.py +9 -22
  90. nkululeko/test_pretrain.py +6 -7
  91. nkululeko/utils/stats.py +0 -1
  92. nkululeko/utils/util.py +4 -5
  93. {nkululeko-0.90.0.dist-info → nkululeko-0.90.2.dist-info}/METADATA +11 -2
  94. nkululeko-0.90.2.dist-info/RECORD +119 -0
  95. {nkululeko-0.90.0.dist-info → nkululeko-0.90.2.dist-info}/WHEEL +1 -1
  96. nkululeko-0.90.0.dist-info/RECORD +0 -114
  97. {nkululeko-0.90.0.dist-info → nkululeko-0.90.2.dist-info}/LICENSE +0 -0
  98. {nkululeko-0.90.0.dist-info → nkululeko-0.90.2.dist-info}/top_level.txt +0 -0
nkululeko/aug_train.py CHANGED
@@ -1,15 +1,17 @@
1
1
  # aug_train.py
2
2
  # train with augmentations
3
+ import argparse
3
4
  import ast
5
+ import configparser
4
6
  import os.path
7
+
5
8
  import numpy as np
6
- import configparser
7
- import argparse
9
+
8
10
  import nkululeko.experiment as exp
9
- from nkululeko.utils.util import Util
10
- from nkululeko.constants import VERSION
11
11
  import nkululeko.glob_conf as glob_conf
12
12
  from nkululeko.augment import doit as augment
13
+ from nkululeko.constants import VERSION
14
+ from nkululeko.utils.util import Util
13
15
 
14
16
 
15
17
  def doit(config_file):
nkululeko/augment.py CHANGED
@@ -2,13 +2,15 @@
2
2
  # augment the training sets
3
3
 
4
4
  import argparse
5
- import pandas as pd
6
- import os
7
5
  import ast
8
- from nkululeko.experiment import Experiment
9
6
  import configparser
10
- from nkululeko.utils.util import Util
7
+ import os
8
+
9
+ import pandas as pd
10
+
11
11
  from nkululeko.constants import VERSION
12
+ from nkululeko.experiment import Experiment
13
+ from nkululeko.utils.util import Util
12
14
 
13
15
 
14
16
  def doit(config_file):
@@ -1,12 +1,12 @@
1
1
  # augmenter.py
2
2
  import os
3
- import numpy as np
3
+
4
+ import audeer
5
+ import audiofile
4
6
  import pandas as pd
5
7
  from audiomentations import *
6
8
  from tqdm import tqdm
7
- import audeer
8
- import audiofile
9
- from audformat.utils import map_file_path
9
+
10
10
  from nkululeko.utils.util import Util
11
11
 
12
12
 
@@ -13,15 +13,15 @@ F. Burkhardt, Anna Derington, Matthias Kahlau, Klaus Scherer, Florian Eyben and
13
13
 
14
14
  """
15
15
 
16
- import pandas as pd
17
- from tqdm import tqdm
18
16
  import os
19
- import numpy as np
20
- import audiofile as af
21
- from audformat.utils import map_file_path
17
+
22
18
  import audeer
23
- from nkululeko.utils.util import Util
19
+ import audiofile as af
20
+ import pandas as pd
21
+ from tqdm import tqdm
22
+
24
23
  import nkululeko.augmenting.randomsplicing as rsp
24
+ from nkululeko.utils.util import Util
25
25
 
26
26
 
27
27
  class Randomsplicer:
@@ -10,9 +10,9 @@ Evaluated in:
10
10
  F. Burkhardt, Anna Derington, Matthias Kahlau, Klaus Scherer, Florian Eyben and Björn Schuller: Masking Speech Contents by Random Splicing: is Emotional Expression Preserved?, Proc. ICASSP, 2023
11
11
 
12
12
  """
13
- import numpy as np
13
+
14
14
  import librosa
15
- import audiofile as af
15
+ import numpy as np
16
16
 
17
17
 
18
18
  def random_splicing(
@@ -54,7 +54,6 @@ def split_wav_naive(wav, top_db=12):
54
54
 
55
55
 
56
56
  def remix_random_reverse(wav, indices, p_reverse=0):
57
- import random
58
57
 
59
58
  wav_remix = []
60
59
 
@@ -2,12 +2,14 @@
2
2
  resample a data frame
3
3
 
4
4
  """
5
+
5
6
  import os
6
7
  import shutil
7
8
 
8
9
  import audformat
9
10
  import pandas as pd
10
11
  import torchaudio
12
+
11
13
  from nkululeko.utils.util import Util
12
14
 
13
15
 
@@ -18,8 +20,11 @@ class Resampler:
18
20
  self.util = Util("resampler", has_config=not_testing)
19
21
  self.util.warn(f"all files might be resampled to {self.SAMPLING_RATE}")
20
22
  self.not_testing = not_testing
21
- self.replace = eval(self.util.config_val(
22
- "RESAMPLE", "replace", "False")) if not not_testing else replace
23
+ self.replace = (
24
+ eval(self.util.config_val("RESAMPLE", "replace", "False"))
25
+ if not not_testing
26
+ else replace
27
+ )
23
28
 
24
29
  def resample(self):
25
30
  files = self.df.index.get_level_values(0).values
@@ -45,8 +50,7 @@ class Resampler:
45
50
  continue
46
51
  if org_sr != self.SAMPLING_RATE:
47
52
  self.util.debug(f"resampling {f} (sr = {org_sr})")
48
- resampler = torchaudio.transforms.Resample(
49
- org_sr, self.SAMPLING_RATE)
53
+ resampler = torchaudio.transforms.Resample(org_sr, self.SAMPLING_RATE)
50
54
  signal = resampler(signal)
51
55
  if replace:
52
56
  torchaudio.save(
@@ -63,8 +67,7 @@ class Resampler:
63
67
  self.df = self.df.set_index(
64
68
  self.df.index.set_levels(new_files, level="file")
65
69
  )
66
- target_file = self.util.config_val(
67
- "RESAMPLE", "target", "resampled.csv")
70
+ target_file = self.util.config_val("RESAMPLE", "target", "resampled.csv")
68
71
  # remove encoded labels
69
72
  target = self.util.config_val("DATA", "target", "emotion")
70
73
  if "class_label" in self.df.columns:
@@ -2,10 +2,12 @@
2
2
  A predictor for age.
3
3
  Currently based on audEERING's agender model.
4
4
  """
5
- from nkululeko.utils.util import Util
6
- from nkululeko.feature_extractor import FeatureExtractor
5
+
7
6
  import ast
7
+
8
8
  import nkululeko.glob_conf as glob_conf
9
+ from nkululeko.feature_extractor import FeatureExtractor
10
+ from nkululeko.utils.util import Util
9
11
 
10
12
 
11
13
  class AgePredictor:
@@ -2,10 +2,12 @@
2
2
  A predictor for emotional arousal.
3
3
  Currently based on audEERING's emotional dimension model.
4
4
  """
5
- from nkululeko.utils.util import Util
6
- from nkululeko.feature_extractor import FeatureExtractor
5
+
7
6
  import ast
7
+
8
8
  import nkululeko.glob_conf as glob_conf
9
+ from nkululeko.feature_extractor import FeatureExtractor
10
+ from nkululeko.utils.util import Util
9
11
 
10
12
 
11
13
  class ArousalPredictor:
@@ -3,10 +3,11 @@ A predictor for emotional dominance.
3
3
  Currently based on audEERING's emotional dimension model.
4
4
  """
5
5
 
6
- from nkululeko.utils.util import Util
7
- from nkululeko.feature_extractor import FeatureExtractor
8
6
  import ast
7
+
9
8
  import nkululeko.glob_conf as glob_conf
9
+ from nkululeko.feature_extractor import FeatureExtractor
10
+ from nkululeko.utils.util import Util
10
11
 
11
12
 
12
13
  class DominancePredictor:
@@ -2,10 +2,12 @@
2
2
  A predictor for biological sex.
3
3
  Currently based on audEERING's agender model.
4
4
  """
5
- from nkululeko.utils.util import Util
6
- from nkululeko.feature_extractor import FeatureExtractor
5
+
7
6
  import ast
7
+
8
8
  import nkululeko.glob_conf as glob_conf
9
+ from nkululeko.feature_extractor import FeatureExtractor
10
+ from nkululeko.utils.util import Util
9
11
 
10
12
 
11
13
  class GenderPredictor:
@@ -1,11 +1,14 @@
1
1
  """"
2
2
  A predictor for MOS - mean opinion score.
3
3
  """
4
- from nkululeko.utils.util import Util
4
+
5
5
  import ast
6
+
7
+ import numpy as np
8
+
6
9
  import nkululeko.glob_conf as glob_conf
7
10
  from nkululeko.feature_extractor import FeatureExtractor
8
- import numpy as np
11
+ from nkululeko.utils.util import Util
9
12
 
10
13
 
11
14
  class MOSPredictor:
@@ -1,11 +1,14 @@
1
1
  """"
2
2
  A predictor for PESQ - Perceptual Evaluation of Speech Quality.
3
3
  """
4
- from nkululeko.utils.util import Util
4
+
5
5
  import ast
6
+
7
+ import numpy as np
8
+
6
9
  import nkululeko.glob_conf as glob_conf
7
10
  from nkululeko.feature_extractor import FeatureExtractor
8
- import numpy as np
11
+ from nkululeko.utils.util import Util
9
12
 
10
13
 
11
14
  class PESQPredictor:
@@ -2,11 +2,14 @@
2
2
  A predictor for SDR - Signal to Distortion Ratio.
3
3
  as estimated by Scale-Invariant Signal-to-Distortion Ratio (SI-SDR)
4
4
  """
5
- from nkululeko.utils.util import Util
5
+
6
6
  import ast
7
+
8
+ import numpy as np
9
+
7
10
  import nkululeko.glob_conf as glob_conf
8
11
  from nkululeko.feature_extractor import FeatureExtractor
9
- import numpy as np
12
+ from nkululeko.utils.util import Util
10
13
 
11
14
 
12
15
  class SDRPredictor:
@@ -1,11 +1,14 @@
1
1
  """"
2
2
  A predictor for SNR - signal-to-noise ratio.
3
3
  """
4
- from nkululeko.utils.util import Util
4
+
5
5
  import ast
6
+
7
+ import numpy as np
8
+
6
9
  import nkululeko.glob_conf as glob_conf
7
10
  from nkululeko.feature_extractor import FeatureExtractor
8
- import numpy as np
11
+ from nkululeko.utils.util import Util
9
12
 
10
13
 
11
14
  class SNRPredictor:
@@ -1,11 +1,14 @@
1
1
  """"
2
2
  A predictor for STOI - Short-Time Objective Intelligibility (STOI)
3
3
  """
4
- from nkululeko.utils.util import Util
4
+
5
5
  import ast
6
+
7
+ import numpy as np
8
+
6
9
  import nkululeko.glob_conf as glob_conf
7
10
  from nkululeko.feature_extractor import FeatureExtractor
8
- import numpy as np
11
+ from nkululeko.utils.util import Util
9
12
 
10
13
 
11
14
  class STOIPredictor:
@@ -2,10 +2,12 @@
2
2
  A predictor for emotional valence.
3
3
  Currently based on audEERING's emotional dimension model.
4
4
  """
5
- from nkululeko.utils.util import Util
6
- from nkululeko.feature_extractor import FeatureExtractor
5
+
7
6
  import ast
7
+
8
8
  import nkululeko.glob_conf as glob_conf
9
+ from nkululeko.feature_extractor import FeatureExtractor
10
+ from nkululeko.utils.util import Util
9
11
 
10
12
 
11
13
  class ValencePredictor:
@@ -43,9 +43,7 @@ class SNREstimator:
43
43
  def frame_audio(self, signal):
44
44
  num_frames = 1 + (len(signal) - self.frame_length) // self.hop_length
45
45
  frames = [
46
- signal[
47
- i * self.hop_length: (i * self.hop_length) + self.frame_length
48
- ]
46
+ signal[i * self.hop_length : (i * self.hop_length) + self.frame_length]
49
47
  for i in range(num_frames)
50
48
  ]
51
49
  return frames
@@ -64,11 +62,8 @@ class SNREstimator:
64
62
  for frame in frames
65
63
  ]
66
64
 
67
- energy_threshold_low = np.percentile(
68
- log_energies, 25) # First quartile
69
- energy_threshold_high = np.percentile(
70
- log_energies, 75
71
- ) # Third quartile
65
+ energy_threshold_low = np.percentile(log_energies, 25) # First quartile
66
+ energy_threshold_high = np.percentile(log_energies, 75) # Third quartile
72
67
 
73
68
  low_energy_frames = [
74
69
  log_energy
@@ -94,9 +89,7 @@ class SNREstimator:
94
89
  energy_threshold_high,
95
90
  )
96
91
 
97
- def plot_energy(
98
- self, log_energies, energy_threshold_low, energy_threshold_high
99
- ):
92
+ def plot_energy(self, log_energies, energy_threshold_low, energy_threshold_high):
100
93
  plt.figure(figsize=(10, 6))
101
94
  plt.plot(log_energies, label="Log Energy")
102
95
  plt.axhline(
@@ -153,9 +146,12 @@ def main():
153
146
 
154
147
  signal, sr = audiofile.read(args.input)
155
148
  snr_estimator = SNREstimator(signal, sr, args.window_size, args.hop_size)
156
- estimated_snr, log_energies, energy_threshold_low, energy_threshold_high = (
157
- snr_estimator.estimate_snr()
158
- )
149
+ (
150
+ estimated_snr,
151
+ log_energies,
152
+ energy_threshold_low,
153
+ energy_threshold_high,
154
+ ) = snr_estimator.estimate_snr()
159
155
 
160
156
  print("Estimated SNR:", estimated_snr)
161
157
 
@@ -1,5 +1,5 @@
1
- import torch
2
1
  import numpy as np
2
+ import torch
3
3
 
4
4
 
5
5
  class CachedDataset(torch.utils.data.Dataset):
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.90.0"
1
+ VERSION = "0.90.2"
2
2
  SAMPLING_RATE = 16000
nkululeko/data/dataset.py CHANGED
@@ -3,13 +3,11 @@ import ast
3
3
  import os
4
4
  import os.path
5
5
  from random import sample
6
- import numpy as np
7
- import pandas as pd
8
6
 
9
7
  import audformat
10
- from audformat.utils import duration
8
+ import numpy as np
9
+ import pandas as pd
11
10
 
12
- import nkululeko.filter_data as filter
13
11
  import nkululeko.glob_conf as glob_conf
14
12
  from nkululeko.filter_data import DataFilter
15
13
  from nkululeko.plots import Plots
@@ -274,20 +272,20 @@ class Dataset:
274
272
  # try to get the target values
275
273
  df_local[self.target] = source_df[self.col_label]
276
274
  is_labeled = True
277
- except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
275
+ except (KeyError, ValueError, audformat.errors.BadKeyError):
278
276
  pass
279
277
  try:
280
278
  # try to get the speaker values
281
279
  df_local["speaker"] = source_df["speaker"]
282
280
  got_speaker = True
283
- except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
281
+ except (KeyError, ValueError, audformat.errors.BadKeyError):
284
282
  pass
285
283
  try:
286
284
  # try to get the gender values
287
285
  if "gender" in source_df:
288
286
  df_local["gender"] = source_df["gender"]
289
287
  got_gender = True
290
- except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
288
+ except (KeyError, ValueError, audformat.errors.BadKeyError):
291
289
  pass
292
290
  try:
293
291
  # try to get the age values
@@ -311,13 +309,13 @@ class Dataset:
311
309
  # also it might be possible that the age is part of the speaker description
312
310
  df_local["age"] = db[table]["speaker"].get(map="age").astype(int)
313
311
  got_age = True
314
- except (ValueError, audformat.errors.BadKeyError) as e:
312
+ except (ValueError, audformat.errors.BadKeyError):
315
313
  pass
316
314
  try:
317
315
  # same for the target, e.g. "age"
318
316
  df_local[self.target] = db[table]["speaker"].get(map=self.target)
319
317
  is_labeled = True
320
- except (ValueError, audformat.core.errors.BadKeyError) as e:
318
+ except (ValueError, audformat.core.errors.BadKeyError):
321
319
  pass
322
320
  # copy other column
323
321
  for column in source_df.columns:
@@ -463,8 +461,7 @@ class Dataset:
463
461
 
464
462
  def balanced_split(self):
465
463
  """One way to split train and eval sets: Generate split dataframes for some balancing criterion"""
466
- from splitutils import binning
467
- from splitutils import optimize_traintest_split
464
+ from splitutils import binning, optimize_traintest_split
468
465
 
469
466
  seed = 42
470
467
  k = 30
@@ -473,7 +470,7 @@ class Dataset:
473
470
  # split target
474
471
  targets = df[self.target].to_numpy()
475
472
  #
476
- bins = self.util.config_val("DATA", f"bin", False)
473
+ bins = self.util.config_val("DATA", "bin", False)
477
474
  if bins:
478
475
  nbins = len(ast.literal_eval(bins))
479
476
  targets = binning(targets, nbins=nbins)
@@ -481,7 +478,7 @@ class Dataset:
481
478
  speakers = df["speaker"].to_numpy()
482
479
 
483
480
  # on which variables (targets, groupings) to stratify
484
- stratif_vars = self.util.config_val("DATA", f"balance", False)
481
+ stratif_vars = self.util.config_val("DATA", "balance", False)
485
482
  stratif_vars_array = {}
486
483
  if not stratif_vars:
487
484
  self.util.error("balanced split needs stratif_vars to stratify the splits")
@@ -500,7 +497,7 @@ class Dataset:
500
497
  # weights for all stratify_on variables and
501
498
  # and for test proportion match. Give target
502
499
  # variable EMOTION more weight than groupings.
503
- size_diff = int(self.util.config_val("DATA", f"size_diff_weight", "1"))
500
+ size_diff = int(self.util.config_val("DATA", "size_diff_weight", "1"))
504
501
  weights = {
505
502
  "size_diff": size_diff,
506
503
  }
@@ -2,10 +2,12 @@
2
2
  import ast
3
3
  import os
4
4
  import os.path
5
- import pandas as pd
5
+
6
6
  import audformat.utils
7
- from nkululeko.data.dataset import Dataset
7
+ import pandas as pd
8
+
8
9
  import nkululeko.glob_conf as glob_conf
10
+ from nkululeko.data.dataset import Dataset
9
11
  from nkululeko.reporting.report_item import ReportItem
10
12
 
11
13
 
@@ -81,7 +83,7 @@ class Dataset_CSV(Dataset):
81
83
  self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
82
84
  is_index = False
83
85
  try:
84
- if self.is_labeled and not "class_label" in self.df.columns:
86
+ if self.is_labeled and "class_label" not in self.df.columns:
85
87
  self.df["class_label"] = self.df[self.target]
86
88
  except AttributeError:
87
89
  is_index = True
nkululeko/demo-ft.py ADDED
@@ -0,0 +1,29 @@
1
+ import argparse
2
+ import configparser
3
+ import os
4
+
5
+ from sklearn import pipeline
6
+ from transformers import pipelines
7
+
8
+ from nkululeko.utils.util import get_exp_dir
9
+
10
+ parser = argparse.ArgumentParser()
11
+ parser.add_argument("--file", help="A file that should be processed (16kHz mono wav)")
12
+ # read config from ini file
13
+ parser.add_argument("--config", default="exp.ini", help="The base configuration")
14
+
15
+
16
+ args = parser.parse_args()
17
+ file = args.file
18
+ config = configparser.ConfigParser()
19
+
20
+ # get exp dir from config [EXP][root][name] + models + run_0 + torch
21
+ config.read(args.config)
22
+ exp_dir = get_exp_dir("model_path")
23
+
24
+ # exp_dir = get_exp_dir("model_path")
25
+ model_path = os.path.join(exp_dir, "model")
26
+ pipe = pipelines("audio-classification", model=model_path)
27
+
28
+
29
+ print(pipeline(file))
nkululeko/demo_feats.py CHANGED
@@ -1,13 +1,14 @@
1
1
  # demo_feats.py
2
2
  # Test some features extracted
3
3
 
4
- from nkululeko.experiment import Experiment
5
- import configparser
6
- from nkululeko.utils.util import Util
7
- from nkululeko.constants import VERSION
8
4
  import argparse
5
+ import configparser
9
6
  import os
10
7
 
8
+ from nkululeko.constants import VERSION
9
+ from nkululeko.experiment import Experiment
10
+ from nkululeko.utils.util import Util
11
+
11
12
 
12
13
  def main(src_dir):
13
14
  parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
@@ -1,11 +1,10 @@
1
1
  # demo_predictor.py
2
2
  import os
3
3
 
4
- import numpy as np
5
- import pandas as pd
6
-
7
4
  import audformat
8
5
  import audiofile
6
+ import numpy as np
7
+ import pandas as pd
9
8
 
10
9
  import nkululeko.glob_conf as glob_conf
11
10
  from nkululeko.utils.util import Util
@@ -46,7 +45,7 @@ class Demo_predictor:
46
45
  )
47
46
  else:
48
47
  file_list = in_df.index.values
49
- except (ValueError, AttributeError) as error:
48
+ except (ValueError, AttributeError):
50
49
  with open(self.file) as f:
51
50
  first = True
52
51
  for index, line in enumerate(f):
nkululeko/ensemble.py CHANGED
@@ -15,28 +15,20 @@ Raises:
15
15
  ValueError: If an unknown ensemble method is provided.
16
16
  AssertionError: If the number of config files is less than 2 for majority voting.
17
17
  """
18
+
18
19
  #!/usr/bin/env python
19
20
  # -*- coding: utf-8 -*-
20
21
 
21
22
 
22
- from typing import List
23
23
  import configparser
24
24
  import time
25
25
  from argparse import ArgumentParser
26
26
  from pathlib import Path
27
+ from typing import List
27
28
 
28
29
  import numpy as np
29
30
  import pandas as pd
30
- import matplotlib.pyplot as plt
31
-
32
- from sklearn.metrics import(
33
- RocCurveDisplay,
34
- balanced_accuracy_score,
35
- classification_report,
36
- auc,
37
- roc_auc_score,
38
- roc_curve
39
- )
31
+ from sklearn.metrics import balanced_accuracy_score, classification_report
40
32
 
41
33
  from nkululeko.constants import VERSION
42
34
  from nkululeko.experiment import Experiment
@@ -169,17 +161,19 @@ def performance_weighted_ensemble(ensemble_preds_ls, labels, weights):
169
161
 
170
162
  # asserts weiths in decimal 0-1
171
163
  assert all(0 <= w <= 1 for w in weights), "Weights must be between 0 and 1"
172
-
164
+
173
165
  # assert lenght of weights matches number of models
174
- assert len(weights) == len(ensemble_preds_ls), "Number of weights must match number of models"
175
-
166
+ assert len(weights) == len(
167
+ ensemble_preds_ls
168
+ ), "Number of weights must match number of models"
169
+
176
170
  # Normalize weights
177
171
  total_weight = sum(weights)
178
172
  weights = [weight / total_weight for weight in weights]
179
-
173
+
180
174
  for idx in ensemble_preds_ls[0].index:
181
175
  class_probabilities = {label: 0 for label in labels}
182
-
176
+
183
177
  for df, weight in zip(ensemble_preds_ls, weights):
184
178
  row = df.loc[idx]
185
179
  for label in labels:
@@ -192,10 +186,12 @@ def performance_weighted_ensemble(ensemble_preds_ls, labels, weights):
192
186
  return final_predictions, final_confidences
193
187
 
194
188
 
195
-
196
-
197
189
  def ensemble_predictions(
198
- config_files: List[str], method: str, threshold: float, weights: List[float], no_labels: bool
190
+ config_files: List[str],
191
+ method: str,
192
+ threshold: float,
193
+ weights: List[float],
194
+ no_labels: bool,
199
195
  ) -> pd.DataFrame:
200
196
  """
201
197
  Ensemble predictions from multiple experiments.
@@ -261,17 +257,20 @@ def ensemble_predictions(
261
257
  ensemble_preds_ls, labels, threshold
262
258
  )
263
259
  elif method == "uncertainty_weighted":
264
- ensemble_preds["predicted"], ensemble_preds["uncertainty"] = (
265
- uncertainty_weighted_ensemble(ensemble_preds_ls, labels)
266
- )
260
+ (
261
+ ensemble_preds["predicted"],
262
+ ensemble_preds["uncertainty"],
263
+ ) = uncertainty_weighted_ensemble(ensemble_preds_ls, labels)
267
264
  elif method == "confidence_weighted":
268
- ensemble_preds["predicted"], ensemble_preds["confidence"] = (
269
- confidence_weighted_ensemble(ensemble_preds_ls, labels)
270
- )
265
+ (
266
+ ensemble_preds["predicted"],
267
+ ensemble_preds["confidence"],
268
+ ) = confidence_weighted_ensemble(ensemble_preds_ls, labels)
271
269
  elif method == "performance_weighted":
272
- ensemble_preds["predicted"], ensemble_preds["confidence"] = (
273
- performance_weighted_ensemble(ensemble_preds_ls, labels, weights)
274
- )
270
+ (
271
+ ensemble_preds["predicted"],
272
+ ensemble_preds["confidence"],
273
+ ) = performance_weighted_ensemble(ensemble_preds_ls, labels, weights)
275
274
  else:
276
275
  raise ValueError(f"Unknown ensemble method: {method}")
277
276