nkululeko 0.89.2__py3-none-any.whl → 0.90.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. nkululeko/aug_train.py +6 -4
  2. nkululeko/augment.py +8 -6
  3. nkululeko/augmenting/augmenter.py +4 -4
  4. nkululeko/augmenting/randomsplicer.py +12 -9
  5. nkululeko/augmenting/randomsplicing.py +2 -3
  6. nkululeko/augmenting/resampler.py +9 -6
  7. nkululeko/autopredict/ap_age.py +4 -2
  8. nkululeko/autopredict/ap_arousal.py +4 -2
  9. nkululeko/autopredict/ap_dominance.py +3 -2
  10. nkululeko/autopredict/ap_gender.py +4 -2
  11. nkululeko/autopredict/ap_mos.py +5 -2
  12. nkululeko/autopredict/ap_pesq.py +5 -2
  13. nkululeko/autopredict/ap_sdr.py +5 -2
  14. nkululeko/autopredict/ap_snr.py +5 -2
  15. nkululeko/autopredict/ap_stoi.py +5 -2
  16. nkululeko/autopredict/ap_valence.py +4 -2
  17. nkululeko/autopredict/estimate_snr.py +10 -14
  18. nkululeko/cacheddataset.py +1 -1
  19. nkululeko/constants.py +1 -1
  20. nkululeko/data/dataset.py +19 -16
  21. nkululeko/data/dataset_csv.py +5 -3
  22. nkululeko/demo-ft.py +29 -0
  23. nkululeko/demo_feats.py +5 -4
  24. nkululeko/demo_predictor.py +3 -4
  25. nkululeko/ensemble.py +27 -28
  26. nkululeko/experiment.py +11 -7
  27. nkululeko/experiment_felix.py +728 -0
  28. nkululeko/explore.py +1 -0
  29. nkululeko/export.py +7 -5
  30. nkululeko/feat_extract/feats_agender.py +5 -4
  31. nkululeko/feat_extract/feats_agender_agender.py +7 -6
  32. nkululeko/feat_extract/feats_analyser.py +18 -16
  33. nkululeko/feat_extract/feats_ast.py +9 -8
  34. nkululeko/feat_extract/feats_auddim.py +3 -5
  35. nkululeko/feat_extract/feats_audmodel.py +2 -2
  36. nkululeko/feat_extract/feats_clap.py +9 -12
  37. nkululeko/feat_extract/feats_hubert.py +2 -3
  38. nkululeko/feat_extract/feats_import.py +5 -4
  39. nkululeko/feat_extract/feats_mld.py +3 -5
  40. nkululeko/feat_extract/feats_mos.py +4 -3
  41. nkululeko/feat_extract/feats_opensmile.py +4 -3
  42. nkululeko/feat_extract/feats_oxbow.py +5 -4
  43. nkululeko/feat_extract/feats_praat.py +4 -7
  44. nkululeko/feat_extract/feats_snr.py +3 -5
  45. nkululeko/feat_extract/feats_spectra.py +8 -9
  46. nkululeko/feat_extract/feats_spkrec.py +6 -11
  47. nkululeko/feat_extract/feats_squim.py +2 -4
  48. nkululeko/feat_extract/feats_trill.py +2 -5
  49. nkululeko/feat_extract/feats_wav2vec2.py +8 -4
  50. nkululeko/feat_extract/feats_wavlm.py +2 -3
  51. nkululeko/feat_extract/feats_whisper.py +4 -6
  52. nkululeko/feat_extract/featureset.py +4 -2
  53. nkululeko/feat_extract/feinberg_praat.py +1 -3
  54. nkululeko/feat_extract/transformer_feature_extractor.py +147 -0
  55. nkululeko/file_checker.py +3 -3
  56. nkululeko/filter_data.py +3 -1
  57. nkululeko/fixedsegment.py +83 -0
  58. nkululeko/models/model.py +3 -5
  59. nkululeko/models/model_bayes.py +1 -0
  60. nkululeko/models/model_cnn.py +4 -6
  61. nkululeko/models/model_gmm.py +13 -9
  62. nkululeko/models/model_knn.py +1 -0
  63. nkululeko/models/model_knn_reg.py +1 -0
  64. nkululeko/models/model_lin_reg.py +1 -0
  65. nkululeko/models/model_mlp.py +2 -3
  66. nkululeko/models/model_mlp_regression.py +1 -6
  67. nkululeko/models/model_svm.py +2 -2
  68. nkululeko/models/model_svr.py +1 -0
  69. nkululeko/models/model_tree.py +2 -3
  70. nkululeko/models/model_tree_reg.py +1 -0
  71. nkululeko/models/model_tuned.py +54 -33
  72. nkululeko/models/model_xgb.py +1 -0
  73. nkululeko/models/model_xgr.py +1 -0
  74. nkululeko/multidb.py +1 -0
  75. nkululeko/nkululeko.py +1 -1
  76. nkululeko/plots.py +1 -1
  77. nkululeko/predict.py +4 -5
  78. nkululeko/reporting/defines.py +6 -8
  79. nkululeko/reporting/latex_writer.py +3 -3
  80. nkululeko/reporting/report.py +2 -2
  81. nkululeko/reporting/report_item.py +1 -0
  82. nkululeko/reporting/reporter.py +20 -19
  83. nkululeko/resample.py +8 -12
  84. nkululeko/resample_cli.py +99 -0
  85. nkululeko/runmanager.py +3 -1
  86. nkululeko/scaler.py +1 -1
  87. nkululeko/segment.py +6 -5
  88. nkululeko/segmenting/seg_inaspeechsegmenter.py +3 -3
  89. nkululeko/segmenting/seg_silero.py +4 -4
  90. nkululeko/syllable_nuclei.py +9 -22
  91. nkululeko/test_pretrain.py +6 -7
  92. nkululeko/utils/stats.py +0 -1
  93. nkululeko/utils/util.py +2 -3
  94. {nkululeko-0.89.2.dist-info → nkululeko-0.90.1.dist-info}/METADATA +12 -2
  95. nkululeko-0.90.1.dist-info/RECORD +119 -0
  96. {nkululeko-0.89.2.dist-info → nkululeko-0.90.1.dist-info}/WHEEL +1 -1
  97. nkululeko-0.89.2.dist-info/RECORD +0 -114
  98. {nkululeko-0.89.2.dist-info → nkululeko-0.90.1.dist-info}/LICENSE +0 -0
  99. {nkululeko-0.89.2.dist-info → nkululeko-0.90.1.dist-info}/top_level.txt +0 -0
nkululeko/aug_train.py CHANGED
@@ -1,15 +1,17 @@
1
1
  # aug_train.py
2
2
  # train with augmentations
3
+ import argparse
3
4
  import ast
5
+ import configparser
4
6
  import os.path
7
+
5
8
  import numpy as np
6
- import configparser
7
- import argparse
9
+
8
10
  import nkululeko.experiment as exp
9
- from nkululeko.utils.util import Util
10
- from nkululeko.constants import VERSION
11
11
  import nkululeko.glob_conf as glob_conf
12
12
  from nkululeko.augment import doit as augment
13
+ from nkululeko.constants import VERSION
14
+ from nkululeko.utils.util import Util
13
15
 
14
16
 
15
17
  def doit(config_file):
nkululeko/augment.py CHANGED
@@ -2,13 +2,15 @@
2
2
  # augment the training sets
3
3
 
4
4
  import argparse
5
- import pandas as pd
6
- import os
7
5
  import ast
8
- from nkululeko.experiment import Experiment
9
6
  import configparser
10
- from nkululeko.utils.util import Util
7
+ import os
8
+
9
+ import pandas as pd
10
+
11
11
  from nkululeko.constants import VERSION
12
+ from nkululeko.experiment import Experiment
13
+ from nkululeko.utils.util import Util
12
14
 
13
15
 
14
16
  def doit(config_file):
@@ -37,8 +39,8 @@ def doit(config_file):
37
39
 
38
40
  filename = util.config_val("AUGMENT", "result", "augmented.csv")
39
41
  filename = f"{expr.data_dir}/{filename}"
40
-
41
- if os.path.exists(filename):
42
+ no_reuse = eval(util.config_val("DATA", "no_reuse", "False"))
43
+ if os.path.exists(filename) and not no_reuse:
42
44
  util.debug("files already augmented")
43
45
  else:
44
46
  # load the data
@@ -1,12 +1,12 @@
1
1
  # augmenter.py
2
2
  import os
3
- import numpy as np
3
+
4
+ import audeer
5
+ import audiofile
4
6
  import pandas as pd
5
7
  from audiomentations import *
6
8
  from tqdm import tqdm
7
- import audeer
8
- import audiofile
9
- from audformat.utils import map_file_path
9
+
10
10
  from nkululeko.utils.util import Util
11
11
 
12
12
 
@@ -13,15 +13,15 @@ F. Burkhardt, Anna Derington, Matthias Kahlau, Klaus Scherer, Florian Eyben and
13
13
 
14
14
  """
15
15
 
16
- import pandas as pd
17
- from tqdm import tqdm
18
16
  import os
19
- import numpy as np
20
- import audiofile as af
21
- from audformat.utils import map_file_path
17
+
22
18
  import audeer
23
- from nkululeko.utils.util import Util
19
+ import audiofile as af
20
+ import pandas as pd
21
+ from tqdm import tqdm
22
+
24
23
  import nkululeko.augmenting.randomsplicing as rsp
24
+ from nkululeko.utils.util import Util
25
25
 
26
26
 
27
27
  class Randomsplicer:
@@ -41,14 +41,17 @@ class Randomsplicer:
41
41
  * top_db: top db level for silence to be recognized (default: 12)
42
42
  """
43
43
 
44
- p_reverse = 0.3
45
- top_db = 12
44
+ p_reverse = float(self.util.config_val("AUGMENT", "p_reverse", "0.3"))
45
+ top_db = float(self.util.config_val("AUGMENT", "top_db", "12"))
46
46
 
47
47
  files = self.df.index.get_level_values(0).values
48
48
  store = self.util.get_path("store")
49
49
  filepath = f"{store}randomspliced/"
50
50
  audeer.mkdir(filepath)
51
- self.util.debug(f"random splicing {sample_selection} samples to {filepath}")
51
+ self.util.debug(
52
+ f"random splicing {sample_selection} samples to {filepath}, "
53
+ + f"p_reverse = {p_reverse}, top_db = {top_db}",
54
+ )
52
55
  newpath = ""
53
56
  index_map = {}
54
57
  for i, f in enumerate(tqdm(files)):
@@ -10,9 +10,9 @@ Evaluated in:
10
10
  F. Burkhardt, Anna Derington, Matthias Kahlau, Klaus Scherer, Florian Eyben and Björn Schuller: Masking Speech Contents by Random Splicing: is Emotional Expression Preserved?, Proc. ICASSP, 2023
11
11
 
12
12
  """
13
- import numpy as np
13
+
14
14
  import librosa
15
- import audiofile as af
15
+ import numpy as np
16
16
 
17
17
 
18
18
  def random_splicing(
@@ -54,7 +54,6 @@ def split_wav_naive(wav, top_db=12):
54
54
 
55
55
 
56
56
  def remix_random_reverse(wav, indices, p_reverse=0):
57
- import random
58
57
 
59
58
  wav_remix = []
60
59
 
@@ -2,12 +2,14 @@
2
2
  resample a data frame
3
3
 
4
4
  """
5
+
5
6
  import os
6
7
  import shutil
7
8
 
8
9
  import audformat
9
10
  import pandas as pd
10
11
  import torchaudio
12
+
11
13
  from nkululeko.utils.util import Util
12
14
 
13
15
 
@@ -18,8 +20,11 @@ class Resampler:
18
20
  self.util = Util("resampler", has_config=not_testing)
19
21
  self.util.warn(f"all files might be resampled to {self.SAMPLING_RATE}")
20
22
  self.not_testing = not_testing
21
- self.replace = eval(self.util.config_val(
22
- "RESAMPLE", "replace", "False")) if not not_testing else replace
23
+ self.replace = (
24
+ eval(self.util.config_val("RESAMPLE", "replace", "False"))
25
+ if not not_testing
26
+ else replace
27
+ )
23
28
 
24
29
  def resample(self):
25
30
  files = self.df.index.get_level_values(0).values
@@ -45,8 +50,7 @@ class Resampler:
45
50
  continue
46
51
  if org_sr != self.SAMPLING_RATE:
47
52
  self.util.debug(f"resampling {f} (sr = {org_sr})")
48
- resampler = torchaudio.transforms.Resample(
49
- org_sr, self.SAMPLING_RATE)
53
+ resampler = torchaudio.transforms.Resample(org_sr, self.SAMPLING_RATE)
50
54
  signal = resampler(signal)
51
55
  if replace:
52
56
  torchaudio.save(
@@ -63,8 +67,7 @@ class Resampler:
63
67
  self.df = self.df.set_index(
64
68
  self.df.index.set_levels(new_files, level="file")
65
69
  )
66
- target_file = self.util.config_val(
67
- "RESAMPLE", "target", "resampled.csv")
70
+ target_file = self.util.config_val("RESAMPLE", "target", "resampled.csv")
68
71
  # remove encoded labels
69
72
  target = self.util.config_val("DATA", "target", "emotion")
70
73
  if "class_label" in self.df.columns:
@@ -2,10 +2,12 @@
2
2
  A predictor for age.
3
3
  Currently based on audEERING's agender model.
4
4
  """
5
- from nkululeko.utils.util import Util
6
- from nkululeko.feature_extractor import FeatureExtractor
5
+
7
6
  import ast
7
+
8
8
  import nkululeko.glob_conf as glob_conf
9
+ from nkululeko.feature_extractor import FeatureExtractor
10
+ from nkululeko.utils.util import Util
9
11
 
10
12
 
11
13
  class AgePredictor:
@@ -2,10 +2,12 @@
2
2
  A predictor for emotional arousal.
3
3
  Currently based on audEERING's emotional dimension model.
4
4
  """
5
- from nkululeko.utils.util import Util
6
- from nkululeko.feature_extractor import FeatureExtractor
5
+
7
6
  import ast
7
+
8
8
  import nkululeko.glob_conf as glob_conf
9
+ from nkululeko.feature_extractor import FeatureExtractor
10
+ from nkululeko.utils.util import Util
9
11
 
10
12
 
11
13
  class ArousalPredictor:
@@ -3,10 +3,11 @@ A predictor for emotional dominance.
3
3
  Currently based on audEERING's emotional dimension model.
4
4
  """
5
5
 
6
- from nkululeko.utils.util import Util
7
- from nkululeko.feature_extractor import FeatureExtractor
8
6
  import ast
7
+
9
8
  import nkululeko.glob_conf as glob_conf
9
+ from nkululeko.feature_extractor import FeatureExtractor
10
+ from nkululeko.utils.util import Util
10
11
 
11
12
 
12
13
  class DominancePredictor:
@@ -2,10 +2,12 @@
2
2
  A predictor for biological sex.
3
3
  Currently based on audEERING's agender model.
4
4
  """
5
- from nkululeko.utils.util import Util
6
- from nkululeko.feature_extractor import FeatureExtractor
5
+
7
6
  import ast
7
+
8
8
  import nkululeko.glob_conf as glob_conf
9
+ from nkululeko.feature_extractor import FeatureExtractor
10
+ from nkululeko.utils.util import Util
9
11
 
10
12
 
11
13
  class GenderPredictor:
@@ -1,11 +1,14 @@
1
1
  """"
2
2
  A predictor for MOS - mean opinion score.
3
3
  """
4
- from nkululeko.utils.util import Util
4
+
5
5
  import ast
6
+
7
+ import numpy as np
8
+
6
9
  import nkululeko.glob_conf as glob_conf
7
10
  from nkululeko.feature_extractor import FeatureExtractor
8
- import numpy as np
11
+ from nkululeko.utils.util import Util
9
12
 
10
13
 
11
14
  class MOSPredictor:
@@ -1,11 +1,14 @@
1
1
  """"
2
2
  A predictor for PESQ - Perceptual Evaluation of Speech Quality.
3
3
  """
4
- from nkululeko.utils.util import Util
4
+
5
5
  import ast
6
+
7
+ import numpy as np
8
+
6
9
  import nkululeko.glob_conf as glob_conf
7
10
  from nkululeko.feature_extractor import FeatureExtractor
8
- import numpy as np
11
+ from nkululeko.utils.util import Util
9
12
 
10
13
 
11
14
  class PESQPredictor:
@@ -2,11 +2,14 @@
2
2
  A predictor for SDR - Signal to Distortion Ratio.
3
3
  as estimated by Scale-Invariant Signal-to-Distortion Ratio (SI-SDR)
4
4
  """
5
- from nkululeko.utils.util import Util
5
+
6
6
  import ast
7
+
8
+ import numpy as np
9
+
7
10
  import nkululeko.glob_conf as glob_conf
8
11
  from nkululeko.feature_extractor import FeatureExtractor
9
- import numpy as np
12
+ from nkululeko.utils.util import Util
10
13
 
11
14
 
12
15
  class SDRPredictor:
@@ -1,11 +1,14 @@
1
1
  """"
2
2
  A predictor for SNR - signal-to-noise ratio.
3
3
  """
4
- from nkululeko.utils.util import Util
4
+
5
5
  import ast
6
+
7
+ import numpy as np
8
+
6
9
  import nkululeko.glob_conf as glob_conf
7
10
  from nkululeko.feature_extractor import FeatureExtractor
8
- import numpy as np
11
+ from nkululeko.utils.util import Util
9
12
 
10
13
 
11
14
  class SNRPredictor:
@@ -1,11 +1,14 @@
1
1
  """"
2
2
  A predictor for STOI - Short-Time Objective Intelligibility (STOI)
3
3
  """
4
- from nkululeko.utils.util import Util
4
+
5
5
  import ast
6
+
7
+ import numpy as np
8
+
6
9
  import nkululeko.glob_conf as glob_conf
7
10
  from nkululeko.feature_extractor import FeatureExtractor
8
- import numpy as np
11
+ from nkululeko.utils.util import Util
9
12
 
10
13
 
11
14
  class STOIPredictor:
@@ -2,10 +2,12 @@
2
2
  A predictor for emotional valence.
3
3
  Currently based on audEERING's emotional dimension model.
4
4
  """
5
- from nkululeko.utils.util import Util
6
- from nkululeko.feature_extractor import FeatureExtractor
5
+
7
6
  import ast
7
+
8
8
  import nkululeko.glob_conf as glob_conf
9
+ from nkululeko.feature_extractor import FeatureExtractor
10
+ from nkululeko.utils.util import Util
9
11
 
10
12
 
11
13
  class ValencePredictor:
@@ -43,9 +43,7 @@ class SNREstimator:
43
43
  def frame_audio(self, signal):
44
44
  num_frames = 1 + (len(signal) - self.frame_length) // self.hop_length
45
45
  frames = [
46
- signal[
47
- i * self.hop_length: (i * self.hop_length) + self.frame_length
48
- ]
46
+ signal[i * self.hop_length : (i * self.hop_length) + self.frame_length]
49
47
  for i in range(num_frames)
50
48
  ]
51
49
  return frames
@@ -64,11 +62,8 @@ class SNREstimator:
64
62
  for frame in frames
65
63
  ]
66
64
 
67
- energy_threshold_low = np.percentile(
68
- log_energies, 25) # First quartile
69
- energy_threshold_high = np.percentile(
70
- log_energies, 75
71
- ) # Third quartile
65
+ energy_threshold_low = np.percentile(log_energies, 25) # First quartile
66
+ energy_threshold_high = np.percentile(log_energies, 75) # Third quartile
72
67
 
73
68
  low_energy_frames = [
74
69
  log_energy
@@ -94,9 +89,7 @@ class SNREstimator:
94
89
  energy_threshold_high,
95
90
  )
96
91
 
97
- def plot_energy(
98
- self, log_energies, energy_threshold_low, energy_threshold_high
99
- ):
92
+ def plot_energy(self, log_energies, energy_threshold_low, energy_threshold_high):
100
93
  plt.figure(figsize=(10, 6))
101
94
  plt.plot(log_energies, label="Log Energy")
102
95
  plt.axhline(
@@ -153,9 +146,12 @@ def main():
153
146
 
154
147
  signal, sr = audiofile.read(args.input)
155
148
  snr_estimator = SNREstimator(signal, sr, args.window_size, args.hop_size)
156
- estimated_snr, log_energies, energy_threshold_low, energy_threshold_high = (
157
- snr_estimator.estimate_snr()
158
- )
149
+ (
150
+ estimated_snr,
151
+ log_energies,
152
+ energy_threshold_low,
153
+ energy_threshold_high,
154
+ ) = snr_estimator.estimate_snr()
159
155
 
160
156
  print("Estimated SNR:", estimated_snr)
161
157
 
@@ -1,5 +1,5 @@
1
- import torch
2
1
  import numpy as np
2
+ import torch
3
3
 
4
4
 
5
5
  class CachedDataset(torch.utils.data.Dataset):
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.89.2"
1
+ VERSION = "0.90.1"
2
2
  SAMPLING_RATE = 16000
nkululeko/data/dataset.py CHANGED
@@ -3,13 +3,11 @@ import ast
3
3
  import os
4
4
  import os.path
5
5
  from random import sample
6
- import numpy as np
7
- import pandas as pd
8
6
 
9
7
  import audformat
10
- from audformat.utils import duration
8
+ import numpy as np
9
+ import pandas as pd
11
10
 
12
- import nkululeko.filter_data as filter
13
11
  import nkululeko.glob_conf as glob_conf
14
12
  from nkululeko.filter_data import DataFilter
15
13
  from nkululeko.plots import Plots
@@ -30,8 +28,8 @@ class Dataset:
30
28
  def __init__(self, name):
31
29
  """Constructor setting up name and configuration"""
32
30
  self.name = name
33
- self.target = glob_conf.config["DATA"]["target"]
34
31
  self.util = Util("dataset")
32
+ self.target = self.util.config_val("DATA", "target", "none")
35
33
  self.plot = Plots()
36
34
  self.limit = int(self.util.config_val_data(self.name, "limit", 0))
37
35
  self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
@@ -127,6 +125,9 @@ class Dataset:
127
125
  self.got_gender,
128
126
  self.got_age,
129
127
  ) = self._get_df_for_lists(self.db, df_files_tables)
128
+ if df.shape[0] > 0 and self.target == "none":
129
+ self.df = df
130
+ return
130
131
  if False in {
131
132
  self.is_labeled,
132
133
  self.got_speaker,
@@ -271,20 +272,20 @@ class Dataset:
271
272
  # try to get the target values
272
273
  df_local[self.target] = source_df[self.col_label]
273
274
  is_labeled = True
274
- except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
275
+ except (KeyError, ValueError, audformat.errors.BadKeyError):
275
276
  pass
276
277
  try:
277
278
  # try to get the speaker values
278
279
  df_local["speaker"] = source_df["speaker"]
279
280
  got_speaker = True
280
- except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
281
+ except (KeyError, ValueError, audformat.errors.BadKeyError):
281
282
  pass
282
283
  try:
283
284
  # try to get the gender values
284
285
  if "gender" in source_df:
285
286
  df_local["gender"] = source_df["gender"]
286
287
  got_gender = True
287
- except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
288
+ except (KeyError, ValueError, audformat.errors.BadKeyError):
288
289
  pass
289
290
  try:
290
291
  # try to get the age values
@@ -308,13 +309,13 @@ class Dataset:
308
309
  # also it might be possible that the age is part of the speaker description
309
310
  df_local["age"] = db[table]["speaker"].get(map="age").astype(int)
310
311
  got_age = True
311
- except (ValueError, audformat.errors.BadKeyError) as e:
312
+ except (ValueError, audformat.errors.BadKeyError):
312
313
  pass
313
314
  try:
314
315
  # same for the target, e.g. "age"
315
316
  df_local[self.target] = db[table]["speaker"].get(map=self.target)
316
317
  is_labeled = True
317
- except (ValueError, audformat.core.errors.BadKeyError) as e:
318
+ except (ValueError, audformat.core.errors.BadKeyError):
318
319
  pass
319
320
  # copy other column
320
321
  for column in source_df.columns:
@@ -460,8 +461,7 @@ class Dataset:
460
461
 
461
462
  def balanced_split(self):
462
463
  """One way to split train and eval sets: Generate split dataframes for some balancing criterion"""
463
- from splitutils import binning
464
- from splitutils import optimize_traintest_split
464
+ from splitutils import binning, optimize_traintest_split
465
465
 
466
466
  seed = 42
467
467
  k = 30
@@ -470,7 +470,7 @@ class Dataset:
470
470
  # split target
471
471
  targets = df[self.target].to_numpy()
472
472
  #
473
- bins = self.util.config_val("DATA", f"bin", False)
473
+ bins = self.util.config_val("DATA", "bin", False)
474
474
  if bins:
475
475
  nbins = len(ast.literal_eval(bins))
476
476
  targets = binning(targets, nbins=nbins)
@@ -478,7 +478,7 @@ class Dataset:
478
478
  speakers = df["speaker"].to_numpy()
479
479
 
480
480
  # on which variables (targets, groupings) to stratify
481
- stratif_vars = self.util.config_val("DATA", f"balance", False)
481
+ stratif_vars = self.util.config_val("DATA", "balance", False)
482
482
  stratif_vars_array = {}
483
483
  if not stratif_vars:
484
484
  self.util.error("balanced split needs stratif_vars to stratify the splits")
@@ -497,7 +497,7 @@ class Dataset:
497
497
  # weights for all stratify_on variables and
498
498
  # and for test proportion match. Give target
499
499
  # variable EMOTION more weight than groupings.
500
- size_diff = int(self.util.config_val("DATA", f"size_diff_weight", "1"))
500
+ size_diff = int(self.util.config_val("DATA", "size_diff_weight", "1"))
501
501
  weights = {
502
502
  "size_diff": size_diff,
503
503
  }
@@ -553,7 +553,10 @@ class Dataset:
553
553
  " samples in train/test"
554
554
  )
555
555
  # because this generates new train/test sample quantaties, the feature extraction has to be done again
556
- glob_conf.config["FEATS"]["needs_feature_extraction"] = "True"
556
+ try:
557
+ glob_conf.config["FEATS"]["needs_feature_extraction"] = "True"
558
+ except KeyError:
559
+ pass
557
560
 
558
561
  def random_split(self):
559
562
  """One way to split train and eval sets: Specify percentage of random samples"""
@@ -2,10 +2,12 @@
2
2
  import ast
3
3
  import os
4
4
  import os.path
5
- import pandas as pd
5
+
6
6
  import audformat.utils
7
- from nkululeko.data.dataset import Dataset
7
+ import pandas as pd
8
+
8
9
  import nkululeko.glob_conf as glob_conf
10
+ from nkululeko.data.dataset import Dataset
9
11
  from nkululeko.reporting.report_item import ReportItem
10
12
 
11
13
 
@@ -81,7 +83,7 @@ class Dataset_CSV(Dataset):
81
83
  self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
82
84
  is_index = False
83
85
  try:
84
- if self.is_labeled and not "class_label" in self.df.columns:
86
+ if self.is_labeled and "class_label" not in self.df.columns:
85
87
  self.df["class_label"] = self.df[self.target]
86
88
  except AttributeError:
87
89
  is_index = True
nkululeko/demo-ft.py ADDED
@@ -0,0 +1,29 @@
1
+ import argparse
2
+ import configparser
3
+ import os
4
+
5
+ from sklearn import pipeline
6
+ from transformers import pipelines
7
+
8
+ from nkululeko.utils.util import get_exp_dir
9
+
10
+ parser = argparse.ArgumentParser()
11
+ parser.add_argument("--file", help="A file that should be processed (16kHz mono wav)")
12
+ # read config from ini file
13
+ parser.add_argument("--config", default="exp.ini", help="The base configuration")
14
+
15
+
16
+ args = parser.parse_args()
17
+ file = args.file
18
+ config = configparser.ConfigParser()
19
+
20
+ # get exp dir from config [EXP][root][name] + models + run_0 + torch
21
+ config.read(args.config)
22
+ exp_dir = get_exp_dir("model_path")
23
+
24
+ # exp_dir = get_exp_dir("model_path")
25
+ model_path = os.path.join(exp_dir, "model")
26
+ pipe = pipelines("audio-classification", model=model_path)
27
+
28
+
29
+ print(pipeline(file))
nkululeko/demo_feats.py CHANGED
@@ -1,13 +1,14 @@
1
1
  # demo_feats.py
2
2
  # Test some features extracted
3
3
 
4
- from nkululeko.experiment import Experiment
5
- import configparser
6
- from nkululeko.utils.util import Util
7
- from nkululeko.constants import VERSION
8
4
  import argparse
5
+ import configparser
9
6
  import os
10
7
 
8
+ from nkululeko.constants import VERSION
9
+ from nkululeko.experiment import Experiment
10
+ from nkululeko.utils.util import Util
11
+
11
12
 
12
13
  def main(src_dir):
13
14
  parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
@@ -1,11 +1,10 @@
1
1
  # demo_predictor.py
2
2
  import os
3
3
 
4
- import numpy as np
5
- import pandas as pd
6
-
7
4
  import audformat
8
5
  import audiofile
6
+ import numpy as np
7
+ import pandas as pd
9
8
 
10
9
  import nkululeko.glob_conf as glob_conf
11
10
  from nkululeko.utils.util import Util
@@ -46,7 +45,7 @@ class Demo_predictor:
46
45
  )
47
46
  else:
48
47
  file_list = in_df.index.values
49
- except (ValueError, AttributeError) as error:
48
+ except (ValueError, AttributeError):
50
49
  with open(self.file) as f:
51
50
  first = True
52
51
  for index, line in enumerate(f):