nkululeko 0.81.3__py3-none-any.whl → 0.81.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,22 +1,25 @@
1
- """
2
- feature_extractor.py
3
-
4
- Helper class to encapsulate feature extraction methods
1
+ """Extract acoustic features from audio samples.
5
2
 
3
+ Extract acoustic features using several feature extractors
4
+ (appends the features column-wise)
6
5
  """
6
+
7
7
  import pandas as pd
8
8
 
9
9
  from nkululeko.utils.util import Util
10
10
 
11
11
 
12
12
  class FeatureExtractor:
13
- """
14
- Extract acoustic features from audio samples, using several feature extractors (appends the features column-wise)
13
+ """Extract acoustic features from audio samples.
14
+
15
+ Extract acoustic features using several feature extractors (appends the features column-wise).
16
+
15
17
  Args:
16
18
  data_df (pandas.DataFrame): dataframe with audiofile paths as index
17
- feats_types (array of strings): designations of acoustic feature extractors to be used
18
- data_name (string): names of databases that are extracted (for the caching)
19
- feats_designation (string): the type of split (train/test), also is used for the cache name.
19
+ feats_types (List[str]): designations of acoustic feature extractors to be used
20
+ data_name (str): name of databases that are extracted (for caching)
21
+ feats_designation (str): the type of split (train/test), also is used for the cache name.
22
+
20
23
  Returns:
21
24
  df (pandas.DataFrame): dataframe with same index as data_df and acoustic features in columns
22
25
  """
@@ -25,7 +28,6 @@ class FeatureExtractor:
25
28
  df = None
26
29
  data_df = None # dataframe to get audio paths
27
30
 
28
- # def __init__
29
31
  def __init__(self, data_df, feats_types, data_name, feats_designation):
30
32
  self.data_df = data_df
31
33
  self.data_name = data_name
@@ -34,147 +36,69 @@ class FeatureExtractor:
34
36
  self.feats_designation = feats_designation
35
37
 
36
38
  def extract(self):
37
- # feats_types = self.util.config_val_list('FEATS', 'type', ['os'])
38
- self.featExtractor = None
39
39
  self.feats = pd.DataFrame()
40
- _scale = True
41
40
  for feats_type in self.feats_types:
42
41
  store_name = f"{self.data_name}_{feats_type}"
43
- if feats_type == "os":
44
- from nkululeko.feat_extract.feats_opensmile import Opensmileset
45
-
46
- self.featExtractor = Opensmileset(
47
- f"{store_name}_{self.feats_designation}", self.data_df
48
- )
49
- elif feats_type == "spectra":
50
- from nkululeko.feat_extract.feats_spectra import Spectraloader
51
-
52
- self.featExtractor = Spectraloader(
53
- f"{store_name}_{self.feats_designation}", self.data_df
54
- )
55
- elif feats_type == "trill":
56
- from nkululeko.feat_extract.feats_trill import TRILLset
57
-
58
- self.featExtractor = TRILLset(
59
- f"{store_name}_{self.feats_designation}", self.data_df
60
- )
61
- elif feats_type.startswith("wav2vec"):
62
- from nkululeko.feat_extract.feats_wav2vec2 import Wav2vec2
63
-
64
- self.featExtractor = Wav2vec2(
65
- f"{store_name}_{self.feats_designation}",
66
- self.data_df,
67
- feats_type,
68
- )
69
- elif feats_type.startswith("hubert"):
70
- from nkululeko.feat_extract.feats_hubert import Hubert
71
-
72
- self.featExtractor = Hubert(
73
- f"{store_name}_{self.feats_designation}",
74
- self.data_df,
75
- feats_type,
76
- )
77
-
78
- elif feats_type.startswith("wavlm"):
79
- from nkululeko.feat_extract.feats_wavlm import Wavlm
80
-
81
- self.featExtractor = Wavlm(
82
- f"{store_name}_{self.feats_designation}",
83
- self.data_df,
84
- feats_type,
85
- )
86
-
87
- elif feats_type.startswith("spkrec"):
88
- from nkululeko.feat_extract.feats_spkrec import Spkrec
89
-
90
- self.featExtractor = Spkrec(
91
- f"{store_name}_{self.feats_designation}",
92
- self.data_df,
93
- feats_type,
94
- )
95
- elif feats_type == "audmodel":
96
- from nkululeko.feat_extract.feats_audmodel import AudModelSet
97
-
98
- self.featExtractor = AudModelSet(
99
- f"{store_name}_{self.feats_designation}", self.data_df
100
- )
101
- elif feats_type == "auddim":
102
- from nkululeko.feat_extract.feats_audmodel_dim import (
103
- AudModelDimSet,
104
- )
105
-
106
- self.featExtractor = AudModelDimSet(
107
- f"{store_name}_{self.feats_designation}", self.data_df
108
- )
109
- elif feats_type == "agender":
110
- from nkululeko.feat_extract.feats_agender import (
111
- AudModelAgenderSet,
112
- )
113
-
114
- self.featExtractor = AudModelAgenderSet(
115
- f"{store_name}_{self.feats_designation}", self.data_df
116
- )
117
- elif feats_type == "agender_agender":
118
- from nkululeko.feat_extract.feats_agender_agender import (
119
- AgenderAgenderSet,
120
- )
121
-
122
- self.featExtractor = AgenderAgenderSet(
123
- f"{store_name}_{self.feats_designation}", self.data_df
124
- )
125
- elif feats_type == "snr":
126
- from nkululeko.feat_extract.feats_snr import SNRSet
127
-
128
- self.featExtractor = SNRSet(
129
- f"{store_name}_{self.feats_designation}", self.data_df
130
- )
131
- elif feats_type == "mos":
132
- from nkululeko.feat_extract.feats_mos import MOSSet
133
-
134
- self.featExtractor = MOSSet(
135
- f"{store_name}_{self.feats_designation}", self.data_df
136
- )
137
- elif feats_type == "squim":
138
- from nkululeko.feat_extract.feats_squim import SQUIMSet
139
-
140
- self.featExtractor = SQUIMSet(
141
- f"{store_name}_{self.feats_designation}", self.data_df
142
- )
143
- elif feats_type == "clap":
144
- from nkululeko.feat_extract.feats_clap import Clap
145
-
146
- self.featExtractor = Clap(
147
- f"{store_name}_{self.feats_designation}", self.data_df
148
- )
149
- elif feats_type == "praat":
150
- from nkululeko.feat_extract.feats_praat import Praatset
151
-
152
- self.featExtractor = Praatset(
153
- f"{store_name}_{self.feats_designation}", self.data_df
154
- )
155
- elif feats_type == "mld":
156
- from nkululeko.feat_extract.feats_mld import MLD_set
157
-
158
- self.featExtractor = MLD_set(
159
- f"{store_name}_{self.feats_designation}", self.data_df
160
- )
161
- elif feats_type == "import":
162
- from nkululeko.feat_extract.feats_import import Importset
163
-
164
- self.featExtractor = Importset(
165
- f"{store_name}_{self.feats_designation}", self.data_df
166
- )
167
- else:
168
- self.util.error(f"unknown feats_type: {feats_type}")
169
-
170
- self.featExtractor.extract()
171
- self.featExtractor.filter()
172
- # remove samples that were not extracted by MLD
173
- # self.df_test = self.df_test.loc[self.df_test.index.intersection(featExtractor_test.df.index)]
174
- # self.df_train = self.df_train.loc[self.df_train.index.intersection(featExtractor_train.df.index)]
175
- self.util.debug(f"{feats_type}: shape : {self.featExtractor.df.shape}")
176
- self.feats = pd.concat([self.feats, self.featExtractor.df], axis=1)
42
+ self.feat_extractor = self._get_feat_extractor(store_name, feats_type)
43
+ self.feat_extractor.extract()
44
+ self.feat_extractor.filter()
45
+ self.feats = pd.concat([self.feats, self.feat_extractor.df], axis=1)
177
46
  return self.feats
178
47
 
179
48
  def extract_sample(self, signal, sr):
180
- return self.featExtractor.extract_sample(signal, sr)
49
+ return self.feat_extractor.extract_sample(signal, sr)
50
+
51
+ def _get_feat_extractor(self, store_name, feats_type):
52
+ feat_extractor_class = self._get_feat_extractor_class(feats_type)
53
+ if feat_extractor_class is None:
54
+ self.util.error(f"unknown feats_type: {feats_type}")
55
+ return feat_extractor_class(
56
+ f"{store_name}_{self.feats_designation}", self.data_df
57
+ )
58
+
59
+ def _get_feat_extractor_class(self, feats_type):
60
+ if feats_type == "os":
61
+ from nkululeko.feat_extract.feats_opensmile import Opensmileset
62
+
63
+ return Opensmileset
64
+ elif feats_type == "spectra":
65
+ from nkululeko.feat_extract.feats_spectra import Spectraloader
66
+
67
+ return Spectraloader
68
+ elif feats_type == "trill":
69
+ from nkululeko.feat_extract.feats_trill import TRILLset
70
+
71
+ return TRILLset
72
+ elif feats_type.startswith(("wav2vec", "hubert", "wavlm", "spkrec")):
73
+ return self._get_feat_extractor_by_prefix(feats_type)
74
+ elif feats_type in (
75
+ "audmodel",
76
+ "auddim",
77
+ "agender",
78
+ "agender_agender",
79
+ "snr",
80
+ "mos",
81
+ "squim",
82
+ "clap",
83
+ "praat",
84
+ "mld",
85
+ "import",
86
+ ):
87
+ return self._get_feat_extractor_by_name(feats_type)
88
+ else:
89
+ return None
90
+
91
+ def _get_feat_extractor_by_prefix(self, feats_type):
92
+ prefix, _, ext = feats_type.partition("_")
93
+ from importlib import import_module
94
+
95
+ module = import_module(f"nkululeko.feat_extract.feats_{prefix.lower()}")
96
+ class_name = f"{prefix.capitalize()}{ext.capitalize()}set"
97
+ return getattr(module, class_name)
98
+
99
+ def _get_feat_extractor_by_name(self, feats_type):
100
+ from importlib import import_module
101
+
102
+ module = import_module(f"nkululeko.feat_extract.feats_{feats_type.lower()}")
103
+ class_name = f"{feats_type.capitalize()}Set"
104
+ return getattr(module, class_name)
nkululeko/modelrunner.py CHANGED
@@ -39,7 +39,7 @@ class Modelrunner:
39
39
  plot_epochs = self.util.config_val("PLOT", "epochs", False)
40
40
  only_test = self.util.config_val("MODEL", "only_test", False)
41
41
  epoch_num = int(self.util.config_val("EXP", "epochs", 1))
42
- if not self.model.is_ANN() and epoch_num > 1:
42
+ if not self.model.is_ann() and epoch_num > 1:
43
43
  self.util.warn(f"setting epoch num to 1 (was {epoch_num}) if model not ANN")
44
44
  epoch_num = 1
45
45
  glob_conf.config["EXP"]["epochs"] = "1"
@@ -69,7 +69,7 @@ class Modelrunner:
69
69
  if plot_epochs:
70
70
  self.util.debug(f"plotting conf matrix to {plot_name}")
71
71
  report.plot_confmatrix(plot_name, epoch)
72
- store_models = self.util.config_val("MODEL", "save", False)
72
+ store_models = self.util.config_val("EXP", "save", False)
73
73
  plot_best_model = self.util.config_val("PLOT", "best_model", False)
74
74
  if (store_models or plot_best_model) and (
75
75
  not only_test
nkululeko/models/model.py CHANGED
@@ -1,23 +1,25 @@
1
1
  # model.py
2
- from nkululeko.utils.util import Util
3
- import pandas as pd
4
- import numpy as np
5
- import nkululeko.glob_conf as glob_conf
6
- import sklearn.utils
7
- from nkululeko.reporting.reporter import Reporter
8
2
  import ast
9
- from sklearn.model_selection import GridSearchCV
10
3
  import pickle
11
4
  import random
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from sklearn.model_selection import GridSearchCV
12
9
  from sklearn.model_selection import LeaveOneGroupOut
13
10
  from sklearn.model_selection import StratifiedKFold
11
+ import sklearn.utils
12
+
13
+ import nkululeko.glob_conf as glob_conf
14
+ from nkululeko.reporting.reporter import Reporter
15
+ from nkululeko.utils.util import Util
14
16
 
15
17
 
16
18
  class Model:
17
- """Generic model class for linear (non-neural) algorithms"""
19
+ """Generic model class for linear (non-neural) algorithms."""
18
20
 
19
21
  def __init__(self, df_train, df_test, feats_train, feats_test):
20
- """Constructor taking the configuration and all dataframes"""
22
+ """Constructor taking the configuration and all dataframes."""
21
23
  self.df_train, self.df_test, self.feats_train, self.feats_test = (
22
24
  df_train,
23
25
  df_test,
@@ -35,7 +37,7 @@ class Model:
35
37
  def set_model_type(self, type):
36
38
  self.model_type = type
37
39
 
38
- def is_ANN(self):
40
+ def is_ann(self):
39
41
  if self.model_type == "ann":
40
42
  return True
41
43
  else:
@@ -277,8 +279,6 @@ class Model:
277
279
  prediction = {}
278
280
  if self.util.exp_is_classification():
279
281
  # get the class probabilities
280
- if not self.get_type() == "xgb":
281
- features = [features]
282
282
  predictions = self.clf.predict_proba(features)
283
283
  # pred = self.clf.predict(features)
284
284
  for i in range(len(self.clf.classes_)):
@@ -302,7 +302,7 @@ class Model:
302
302
  self.clf = pickle.load(handle)
303
303
  except FileNotFoundError as fe:
304
304
  self.util.error(
305
- f"did you forget to store your models? needs: \n[MODEL]\nsave=True\n{fe}"
305
+ f"Did you forget to store your models? needs: \n[MODEL]\nsave=True\n{fe}"
306
306
  )
307
307
 
308
308
  def load_path(self, path, run, epoch):
@@ -25,6 +25,9 @@ class SVM_model(Model):
25
25
  class_weight=class_weight,
26
26
  ) # set up the classifier
27
27
 
28
- def set_C(self, c):
29
- """Set the C parameter"""
28
+ def set_c(self, c):
29
+ """Set the C parameter."""
30
30
  self.clf.C = c
31
+
32
+ def get_type(self):
33
+ return "svm"
nkululeko/multidb.py CHANGED
@@ -3,23 +3,27 @@
3
3
 
4
4
  import argparse
5
5
  import ast
6
- import seaborn as sn
7
- import pandas as pd
8
- import matplotlib.pyplot as plt
6
+ import configparser
7
+ import os
8
+
9
9
  import matplotlib.cm as cm
10
+ import matplotlib.pyplot as plt
10
11
  import numpy as np
11
- import os
12
+ import pandas as pd
13
+ import seaborn as sn
14
+
15
+ import nkululeko.glob_conf as glob_conf
16
+ from nkululeko.aug_train import doit as aug_train
12
17
  from nkululeko.experiment import Experiment
13
- import configparser
14
- from nkululeko.utils.util import Util
15
18
  from nkululeko.nkululeko import doit as nkulu
16
- from nkululeko.aug_train import doit as aug_train
17
- import nkululeko.glob_conf as glob_conf
19
+ from nkululeko.utils.util import Util
18
20
 
19
21
 
20
22
  def main(src_dir):
21
- parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
22
- parser.add_argument("--config", default="exp.ini", help="The base configuration")
23
+ parser = argparse.ArgumentParser(
24
+ description="Call the nkululeko MULTIDB framework.")
25
+ parser.add_argument("--config", default="exp.ini",
26
+ help="The base configuration")
23
27
  args = parser.parse_args()
24
28
  if args.config is not None:
25
29
  config_file = args.config
@@ -54,7 +58,8 @@ def main(src_dir):
54
58
  dataset = datasets[i]
55
59
  print(f"running {dataset}")
56
60
  if extra_trains:
57
- extra_trains_1 = extra_trains.removeprefix("[").removesuffix("]")
61
+ extra_trains_1 = extra_trains.removeprefix(
62
+ "[").removesuffix("]")
58
63
  config["DATA"]["databases"] = f"['{dataset}', {extra_trains_1}]"
59
64
  extra_trains_2 = ast.literal_eval(extra_trains)
60
65
  for extra_train in extra_trains_2:
@@ -67,7 +72,8 @@ def main(src_dir):
67
72
  test = datasets[j]
68
73
  print(f"running train: {train}, test: {test}")
69
74
  if extra_trains:
70
- extra_trains_1 = extra_trains.removeprefix("[").removesuffix("]")
75
+ extra_trains_1 = extra_trains.removeprefix(
76
+ "[").removesuffix("]")
71
77
  config["DATA"][
72
78
  "databases"
73
79
  ] = f"['{train}', '{test}', {extra_trains_1}]"
nkululeko/predict.py CHANGED
@@ -1,17 +1,34 @@
1
1
  # predict.py
2
- # use some model and add automatically predicted labels to train and test splits, than save as a new dataset
2
+ # use some model and add automatically predicted labels to train and test splits
3
+ # then save as a new dataset
4
+
5
+ """This script is used to call the nkululeko PREDICT framework.
6
+
7
+ It loads a configuration file, creates a new experiment,
8
+ and performs automatic prediction on the train and test datasets. The predicted labels are added to the datasets and
9
+ saved as a new dataset.
10
+
11
+ Usage: \n
12
+ python3 -m nkululeko.predict [--config CONFIG_FILE] \n
13
+
14
+ Arguments: \n
15
+ --config (str): The path to the base configuration file (default: exp.ini)
16
+ """
3
17
 
4
- from nkululeko.experiment import Experiment
5
- import configparser
6
- from nkululeko.utils.util import Util
7
- from nkululeko.constants import VERSION
8
18
  import argparse
19
+ import configparser
9
20
  import os
10
21
 
22
+ from nkululeko.constants import VERSION
23
+ from nkululeko.experiment import Experiment
24
+ from nkululeko.utils.util import Util
25
+
11
26
 
12
27
  def main(src_dir):
13
- parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
14
- parser.add_argument("--config", default="exp.ini", help="The base configuration")
28
+ parser = argparse.ArgumentParser(
29
+ description="Call the nkululeko PREDICT framework.")
30
+ parser.add_argument("--config", default="exp.ini",
31
+ help="The base configuration")
15
32
  args = parser.parse_args()
16
33
  if args.config is not None:
17
34
  config_file = args.config
@@ -28,7 +45,9 @@ def main(src_dir):
28
45
  config.read(config_file)
29
46
  # create a new experiment
30
47
  expr = Experiment(config)
31
- util = Util("predict")
48
+ module = "predict"
49
+ expr.set_module(module)
50
+ util = Util(module)
32
51
  util.debug(
33
52
  f"running {expr.name} from config {config_file}, nkululeko version"
34
53
  f" {VERSION}"
@@ -39,7 +58,8 @@ def main(src_dir):
39
58
 
40
59
  # split into train and test
41
60
  expr.fill_train_and_tests()
42
- util.debug(f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
61
+ util.debug(
62
+ f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
43
63
 
44
64
  # process the data
45
65
  df = expr.autopredict()