nkululeko 0.59.1__py3-none-any.whl → 0.61.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. nkululeko/constants.py +1 -1
  2. nkululeko/experiment.py +43 -43
  3. nkululeko/feature_extractor.py +101 -58
  4. nkululeko/modelrunner.py +14 -14
  5. nkululeko/plots.py +11 -0
  6. nkululeko/segment.py +23 -27
  7. nkululeko/test_predictor.py +1 -1
  8. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/METADATA +13 -1
  9. nkululeko-0.61.0.dist-info/RECORD +31 -0
  10. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/WHEEL +1 -1
  11. nkululeko/ap_age.py +0 -31
  12. nkululeko/ap_arousal.py +0 -30
  13. nkululeko/ap_dominance.py +0 -29
  14. nkululeko/ap_gender.py +0 -29
  15. nkululeko/ap_mos.py +0 -35
  16. nkululeko/ap_pesq.py +0 -35
  17. nkululeko/ap_sdr.py +0 -36
  18. nkululeko/ap_snr.py +0 -35
  19. nkululeko/ap_stoi.py +0 -34
  20. nkululeko/ap_valence.py +0 -30
  21. nkululeko/augmenter.py +0 -64
  22. nkululeko/dataset.py +0 -415
  23. nkululeko/dataset_csv.py +0 -49
  24. nkululeko/dataset_ravdess.py +0 -19
  25. nkululeko/estimate_snr.py +0 -89
  26. nkululeko/feats_agender.py +0 -63
  27. nkululeko/feats_agender_agender.py +0 -65
  28. nkululeko/feats_analyser.py +0 -87
  29. nkululeko/feats_audmodel.py +0 -63
  30. nkululeko/feats_audmodel_dim.py +0 -63
  31. nkululeko/feats_clap.py +0 -74
  32. nkululeko/feats_import.py +0 -44
  33. nkululeko/feats_mld.py +0 -47
  34. nkululeko/feats_mos.py +0 -92
  35. nkululeko/feats_opensmile.py +0 -84
  36. nkululeko/feats_oxbow.py +0 -87
  37. nkululeko/feats_praat.py +0 -72
  38. nkululeko/feats_snr.py +0 -63
  39. nkululeko/feats_squim.py +0 -99
  40. nkululeko/feats_trill.py +0 -74
  41. nkululeko/feats_wav2vec2.py +0 -94
  42. nkululeko/featureset.py +0 -41
  43. nkululeko/feinberg_praat.py +0 -430
  44. nkululeko/loss_ccc.py +0 -28
  45. nkululeko/loss_softf1loss.py +0 -40
  46. nkululeko/model.py +0 -256
  47. nkululeko/model_bayes.py +0 -14
  48. nkululeko/model_cnn.py +0 -118
  49. nkululeko/model_gmm.py +0 -16
  50. nkululeko/model_knn.py +0 -16
  51. nkululeko/model_knn_reg.py +0 -16
  52. nkululeko/model_mlp.py +0 -175
  53. nkululeko/model_mlp_regression.py +0 -197
  54. nkululeko/model_svm.py +0 -18
  55. nkululeko/model_svr.py +0 -18
  56. nkululeko/model_tree.py +0 -14
  57. nkululeko/model_tree_reg.py +0 -14
  58. nkululeko/model_xgb.py +0 -12
  59. nkululeko/model_xgr.py +0 -12
  60. nkululeko/randomsplicer.py +0 -76
  61. nkululeko/randomsplicing.py +0 -74
  62. nkululeko-0.59.1.dist-info/RECORD +0 -82
  63. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/LICENSE +0 -0
  64. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/top_level.txt +0 -0
@@ -1,65 +0,0 @@
1
- # feats_audmodel_dim.py
2
- from nkululeko.featureset import Featureset
3
- import os
4
- import pandas as pd
5
- import audeer
6
- import nkululeko.glob_conf as glob_conf
7
- import audonnx
8
- import numpy as np
9
- import audinterface
10
-
11
- class AgenderAgenderSet(Featureset):
12
- """
13
- Age and gender predictions from the wav2vec2. based model finetuned on agender, described in the paper
14
- "Speech-based Age and Gender Prediction with Transformers"
15
- https://arxiv.org/abs/2306.16962
16
- """
17
- def __init__(self, name, data_df):
18
- super().__init__(name, data_df)
19
- model_url = 'https://zenodo.org/record/7761387/files/w2v2-L-robust-6-age-gender.25c844af-1.1.1.zip'
20
- model_root = self.util.config_val('FEATS', 'agender.model', './audmodel_agender/')
21
- if not os.path.isdir(model_root):
22
- cache_root = audeer.mkdir('cache')
23
- model_root = audeer.mkdir(model_root)
24
- archive_path = audeer.download_url(model_url, cache_root, verbose=True)
25
- audeer.extract_archive(archive_path, model_root)
26
- device = self.util.config_val('MODEL', 'device', 'cpu')
27
- self.model = audonnx.load(model_root, device=device)
28
-
29
-
30
- def extract(self):
31
- """Extract the features based on the initialized dataset or re-open them when found on disk."""
32
- store = self.util.get_path('store')
33
- store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
34
- storage = f'{store}{self.name}.{store_format}'
35
- extract = eval(self.util.config_val('FEATS', 'needs_feature_extraction', 'False'))
36
- no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
37
- sampling_rate = 16000
38
- if no_reuse or extract or not os.path.isfile(storage):
39
- self.util.debug('extracting agender model age and gender, this might take a while...')
40
- outputs = ['logits_age', 'logits_gender']
41
- logits = audinterface.Feature(
42
- self.model.labels(outputs),
43
- process_func=self.model,
44
- process_func_args={
45
- 'outputs': outputs,
46
- 'concat': True,
47
- },
48
- sampling_rate=sampling_rate,
49
- resample=True,
50
- verbose=True,
51
- )
52
- self.df = logits.process_index(self.data_df.index)
53
- self.util.write_store(self.df, storage, store_format)
54
- try:
55
- glob_conf.config['DATA']['needs_feature_extraction'] = 'False'
56
- except KeyError:
57
- pass
58
- else:
59
- self.util.debug('reusing extracted audmodel features.')
60
- self.df = self.util.get_store(storage, store_format)
61
-
62
-
63
- def extract_sample(self, signal, sr):
64
- result = self.model(signal, sr)
65
- return np.asarray(result['hidden_states'].flatten())
@@ -1,87 +0,0 @@
1
- # feats_analyser.py
2
- import pandas as pd
3
- from nkululeko.util import Util
4
- from nkululeko.plots import Plots
5
- from sklearn.linear_model import LogisticRegression
6
- from sklearn.tree import DecisionTreeClassifier
7
- from sklearn.linear_model import LinearRegression
8
- from sklearn.tree import DecisionTreeRegressor
9
- import matplotlib.pyplot as plt
10
-
11
- class FeatureAnalyser:
12
-
13
-
14
- def __init__(self, label, df_labels, df_features):
15
- self.util = Util('feats_analyser')
16
- target = self.util.config_val('DATA', 'target', 'emotion')
17
- self.y = df_labels[target]
18
- self.df_labels = df_labels
19
- self.X = df_features
20
- self.label = label
21
-
22
-
23
- def analyse(self):
24
- model_s = self.util.config_val('EXPL', 'model', 'log_reg')
25
- max_feat_num = int(self.util.config_val('EXPL', 'max_feats', '10'))
26
- importance = None
27
- self.util.debug('analysing features...')
28
- if self.util.exp_is_classification():
29
- if model_s == 'log_reg':
30
- model = LogisticRegression()
31
- model.fit(self.X, self.y)
32
- importance = model.coef_[0]
33
- elif model_s == 'tree':
34
- model = DecisionTreeClassifier()
35
- model.fit(self.X, self.y)
36
- importance = model.feature_importances_
37
- plot_tree = eval(self.util.config_val('EXPL', 'plot_tree', 'False'))
38
- if plot_tree:
39
- plots = Plots()
40
- plots.plot_tree(model, self.X)
41
- else:
42
- self.util.error(f'invalid analysis method: {model}')
43
- else: # regression experiment
44
- if model_s == 'lin_reg':
45
- model = LinearRegression()
46
- model.fit(self.X, self.y)
47
- importance = model.coef_
48
- elif model_s == 'tree':
49
- model = DecisionTreeRegressor()
50
- model.fit(self.X, self.y)
51
- importance = model.feature_importances_
52
- else:
53
- self.util.error(f'invalid analysis method: {model_s}')
54
-
55
- df_imp = pd.DataFrame({'feats':self.X.columns, 'importance':importance})
56
- df_imp = df_imp.sort_values(by='importance', ascending=False).iloc[:max_feat_num]
57
- ax = df_imp.plot(x='feats', y='importance', kind='bar')
58
- ax.set(title=f'{self.label} samples')
59
- plt.tight_layout()
60
- fig_dir = self.util.get_path('fig_dir')+'../' # one up because of the runs
61
- exp_name = self.util.get_exp_name(only_data=True)
62
- format = self.util.config_val('PLOT', 'format', 'png')
63
- filename = f'{fig_dir}{exp_name}EXPL_{model_s}.{format}'
64
- plt.savefig(filename)
65
- fig = ax.figure
66
- fig.clear()
67
- plt.close(fig)
68
- # result file
69
- res_dir = self.util.get_path('res_dir')
70
- file_name = f'{res_dir}{self.util.get_exp_name(only_data=True)}EXPL_{model_s}.txt'
71
- with open(file_name, "w") as text_file:
72
- text_file.write(f'features in order of decreasing importance according to model {model_s}:\n'+
73
- f'{str(df_imp.feats.values)}\n')
74
-
75
- df_imp.to_csv(file_name, mode='a')
76
-
77
- # check if feature distributions should be plotted
78
- plot_feats = self.util.config_val('EXPL', 'feature_distributions', False)
79
- if plot_feats:
80
- sample_selection = self.util.config_val('EXPL', 'sample_selection', 'all')
81
- if self.util.exp_is_classification():
82
- for feature in df_imp.feats:
83
- # plot_feature(self, title, feature, label, df_labels, df_features):
84
- _plots = Plots()
85
- _plots.plot_feature(sample_selection, feature, 'class_label', self.df_labels, self.X)
86
- else:
87
- self.util.debug('can\'t plot feature distributions if not classification')
@@ -1,63 +0,0 @@
1
- # feats_audmodel.py
2
- from nkululeko.featureset import Featureset
3
- import os
4
- import pandas as pd
5
- import audeer
6
- import nkululeko.glob_conf as glob_conf
7
- import audonnx
8
- import numpy as np
9
- import audinterface
10
-
11
- class AudModelSet(Featureset):
12
- """
13
- Embeddings from the wav2vec2. based model finetuned on MSPPodcast emotions, described in the paper
14
- "Dawn of the transformer era in speech emotion recognition: closing the valence gap"
15
- https://arxiv.org/abs/2203.07378
16
- """
17
- def __init__(self, name, data_df):
18
- super().__init__(name, data_df)
19
- model_url = 'https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip'
20
- model_root = self.util.config_val('FEATS', 'aud.model', './audmodel/')
21
- if not os.path.isdir(model_root):
22
- cache_root = audeer.mkdir('cache')
23
- model_root = audeer.mkdir(model_root)
24
- archive_path = audeer.download_url(model_url, cache_root, verbose=True)
25
- audeer.extract_archive(archive_path, model_root)
26
- device = self.util.config_val('MODEL', 'device', 'cpu')
27
- self.model = audonnx.load(model_root, device=device)
28
-
29
-
30
- def extract(self):
31
- """Extract the features based on the initialized dataset or re-open them when found on disk."""
32
- store = self.util.get_path('store')
33
- store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
34
- storage = f'{store}{self.name}.{store_format}'
35
- extract = eval(self.util.config_val('FEATS', 'needs_feature_extraction', 'False'))
36
- no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
37
- if no_reuse or extract or not os.path.isfile(storage):
38
- self.util.debug('extracting audmodel embeddings, this might take a while...')
39
- hidden_states = audinterface.Feature(
40
- self.model.labels('hidden_states'),
41
- process_func=self.model,
42
- process_func_args={
43
- 'outputs': 'hidden_states',
44
- },
45
- sampling_rate=16000,
46
- resample=True,
47
- num_workers=5,
48
- verbose=True,
49
- )
50
- self.df = hidden_states.process_index(self.data_df.index)
51
- self.util.write_store(self.df, storage, store_format)
52
- try:
53
- glob_conf.config['DATA']['needs_feature_extraction'] = 'False'
54
- except KeyError:
55
- pass
56
- else:
57
- self.util.debug('reusing extracted audmodel features.')
58
- self.df = self.util.get_store(storage, store_format)
59
-
60
-
61
- def extract_sample(self, signal, sr):
62
- result = self.model(signal, sr)
63
- return np.asarray(result['hidden_states'].flatten())
@@ -1,63 +0,0 @@
1
- # feats_audmodel_dim.py
2
- from nkululeko.featureset import Featureset
3
- import os
4
- import pandas as pd
5
- import audeer
6
- import nkululeko.glob_conf as glob_conf
7
- import audonnx
8
- import numpy as np
9
- import audinterface
10
-
11
- class AudModelDimSet(Featureset):
12
- """
13
- Emotional dimensions from the wav2vec2. based model finetuned on MSPPodcast emotions, described in the paper
14
- "Dawn of the transformer era in speech emotion recognition: closing the valence gap"
15
- https://arxiv.org/abs/2203.07378
16
- """
17
- def __init__(self, name, data_df):
18
- super().__init__(name, data_df)
19
- model_url = 'https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip'
20
- model_root = self.util.config_val('FEATS', 'aud.model', './audmodel/')
21
- if not os.path.isdir(model_root):
22
- cache_root = audeer.mkdir('cache')
23
- model_root = audeer.mkdir(model_root)
24
- archive_path = audeer.download_url(model_url, cache_root, verbose=True)
25
- audeer.extract_archive(archive_path, model_root)
26
- device = self.util.config_val('MODEL', 'device', 'cpu')
27
- self.model = audonnx.load(model_root, device=device)
28
-
29
-
30
- def extract(self):
31
- """Extract the features based on the initialized dataset or re-open them when found on disk."""
32
- store = self.util.get_path('store')
33
- store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
34
- storage = f'{store}{self.name}.{store_format}'
35
- extract = eval(self.util.config_val('FEATS', 'needs_feature_extraction', 'False'))
36
- no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
37
- if no_reuse or extract or not os.path.isfile(storage):
38
- self.util.debug('extracting audmodel dimensions, this might take a while...')
39
- logits = audinterface.Feature(
40
- self.model.labels('logits'),
41
- process_func=self.model,
42
- process_func_args={
43
- 'outputs': 'logits',
44
- },
45
- sampling_rate=16000,
46
- resample=True,
47
- num_workers=5,
48
- verbose=True,
49
- )
50
- self.df = logits.process_index(self.data_df.index)
51
- self.util.write_store(self.df, storage, store_format)
52
- try:
53
- glob_conf.config['DATA']['needs_feature_extraction'] = 'False'
54
- except KeyError:
55
- pass
56
- else:
57
- self.util.debug('reusing extracted audmodel dimensions.')
58
- self.df = self.util.get_store(storage, store_format)
59
-
60
-
61
- def extract_sample(self, signal, sr):
62
- result = self.model(signal, sr)
63
- return np.asarray(result['hidden_states'].flatten())
nkululeko/feats_clap.py DELETED
@@ -1,74 +0,0 @@
1
- # feats_clap.py
2
-
3
- from nkululeko.util import Util
4
- from nkululeko.featureset import Featureset
5
- import os
6
- import pandas as pd
7
- import os
8
- import nkululeko.glob_conf as glob_conf
9
- import laion_clap
10
- import audiofile
11
-
12
- class Clap(Featureset):
13
- """Class to extract laion's clap embeddings (https://github.com/LAION-AI/CLAP)"""
14
-
15
- def __init__(self, name, data_df):
16
- """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
17
- super().__init__(name, data_df)
18
- self.device = self.util.config_val('MODEL', 'device', 'cpu')
19
- self.model_initialized = False
20
-
21
-
22
-
23
- def init_model(self):
24
- # load model
25
- self.util.debug('loading clap model...')
26
- self.model = laion_clap.CLAP_Module(enable_fusion=False)
27
- self.model.load_ckpt() # download the default pretrained checkpoint.
28
- print(f'loaded clap model')
29
-
30
-
31
- def extract(self):
32
- """Extract the features or load them from disk if present."""
33
- store = self.util.get_path('store')
34
- store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
35
- storage = f'{store}{self.name}.{store_format}'
36
- extract = self.util.config_val('FEATS', 'needs_feature_extraction', False)
37
- no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
38
- if extract or no_reuse or not os.path.isfile(storage):
39
- if not self.model_initialized:
40
- self.init_model()
41
- self.util.debug('extracting clap embeddings, this might take a while...')
42
- emb_series = pd.Series(index = self.data_df.index, dtype=object)
43
- length = len(self.data_df.index)
44
- for idx, (file, start, end) in enumerate(self.data_df.index.to_list()):
45
- signal, sampling_rate = audiofile.read(file, offset=start.total_seconds(), duration=(end-start).total_seconds(), always_2d=True)
46
- emb = self.get_embeddings(signal, sampling_rate)
47
- emb_series[idx] = emb
48
- if idx%10==0:
49
- self.util.debug(f'Clap: {idx} of {length} done')
50
- self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
51
- self.util.write_store(self.df, storage, store_format)
52
- try:
53
- glob_conf.config['DATA']['needs_feature_extraction'] = 'false'
54
- except KeyError:
55
- pass
56
- else:
57
- self.util.debug('reusing extracted wav2vec2 embeddings')
58
- self.df = self.util.get_store(storage, store_format)
59
- if self.df.isnull().values.any():
60
- nanrows = self.df.columns[self.df.isna().any()].tolist()
61
- print(nanrows)
62
- self.util.error(f'got nan: {self.df.shape} {self.df.isnull().sum().sum()}')
63
-
64
-
65
- def get_embeddings(self, signal, sampling_rate):
66
- tmp_audio_name = ['clap_audio_tmp.wav']
67
- audiofile.write(tmp_audio_name[0], signal, 48000)
68
- audio_embed = self.model.get_audio_embedding_from_filelist(x = tmp_audio_name, use_tensor=False)
69
- return audio_embed[0]
70
-
71
- def extract_sample(self, signal, sr):
72
- self.init_model()
73
- feats = self.get_embeddings(signal, sr)
74
- return feats
nkululeko/feats_import.py DELETED
@@ -1,44 +0,0 @@
1
- # feats_import.py
2
-
3
- from nkululeko.util import Util
4
- from nkululeko.featureset import Featureset
5
- import os
6
- import pandas as pd
7
- import audformat
8
-
9
- class Importset(Featureset):
10
- """Class to import features that have been compiled elsewhere"""
11
-
12
- def __init__(self, name, data_df):
13
- super().__init__(name, data_df)
14
-
15
- def extract(self):
16
- """Import the features or load them from disk if present."""
17
- store = self.util.get_path('store')
18
- storage = f'{store}{self.name}.pkl'
19
- extract = eval(self.util.config_val('FEATS', 'needs_feature_extraction', False))
20
- no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
21
- feat_import_file = self.util.config_val('FEATS', 'import_file', False)
22
- if not os.path.isfile(feat_import_file):
23
- self.util.warn(f'no import file: {feat_import_file}')
24
- if extract or no_reuse or not os.path.isfile(storage):
25
- self.util.debug(f'importing features for {self.name}')
26
- # df = pd.read_csv(feat_import_file, sep=',', header=0,
27
- # index_col=['file', 'start', 'end'])
28
- df = audformat.utils.read_csv(feat_import_file)
29
- # scale features before use?
30
- # from sklearn.preprocessing import StandardScaler
31
- # scaler = StandardScaler()
32
- # scaled_features = scaler.fit_transform(df.values)
33
- # df = pd.DataFrame(scaled_features, index=df.index, columns=df.columns)
34
- # use only the rows from the data index
35
- #df = self.data_df.join(df).drop(columns=self.data_df.columns)
36
- df = df.loc[self.data_df.index]
37
- #df = pd.concat([self.data_df, df], axis=1, join="inner").drop(columns=self.data_df.columns)
38
- # in any case, store to disk for later use
39
- df.to_pickle(storage)
40
- # and assign to be the "official" feature set
41
- self.df = df
42
- else:
43
- self.util.debug('reusing imported features.')
44
- self.df = pd.read_pickle(storage)
nkululeko/feats_mld.py DELETED
@@ -1,47 +0,0 @@
1
- # mld_fset.py
2
- from nkululeko.featureset import Featureset
3
- import sys
4
- import os
5
- import pandas as pd
6
- import numpy as np
7
- from nkululeko.util import Util
8
- import nkululeko.glob_conf as glob_conf
9
-
10
- class MLD_set(Featureset):
11
-
12
- def __init__(self, name, data_df):
13
- super().__init__(name, data_df)
14
- mld_path = self.util.config_val('FEATS', 'mld.model', None)
15
- sys.path.append(mld_path)
16
-
17
- def extract(self):
18
- store = self.util.get_path('store')
19
- storage = f'{store}{self.name}.pkl'
20
- no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
21
- if no_reuse:
22
- os.remove(storage)
23
- if not os.path.isfile(storage):
24
- self.util.debug('extracting midleveldescriptor features, this might take a while...')
25
- else:
26
- self.util.debug('reusing previously extracted midleveldescriptor features')
27
- import midlevel_descriptors as mld
28
- fex_mld = mld.MLD()
29
- self.df = fex_mld.extract_from_index(index=self.data_df, cache_path=storage)
30
- self.util.debug(f'MLD feats shape: {self.df.shape}')
31
- # shouldn't happen
32
- # replace NANa with column means values
33
- self.util.debug('MLD extractor: checking for NANs...')
34
- for i, col in enumerate(self.df.columns):
35
- if self.df[col].isnull().values.any():
36
- self.util.debug(f'{col} includes {self.df[col].isnull().sum()} nan, inserting mean values')
37
- self.df[col] = self.df[col].fillna(self.df[col].mean())
38
-
39
- try:
40
- # use only samples that have a minimum number of syllables
41
- min_syls = int(glob_conf.config['FEATS']['min_syls'])
42
- self.df = self.df[self.df['hld_nSyl']>=min_syls]
43
- except KeyError:
44
- pass
45
- if self.df.isna().to_numpy().any():
46
- self.util.error('feats 0: NANs exist')
47
- self.df = self.df.astype(float)
nkululeko/feats_mos.py DELETED
@@ -1,92 +0,0 @@
1
- """ feats_mos.py
2
- predict MOS (mean opinion score)
3
-
4
- adapted from
5
- from https://pytorch.org/audio/main/tutorials/squim_tutorial.html#sphx-glr-tutorials-squim-tutorial-py
6
- paper: https://arxiv.org/pdf/2304.01448.pdf
7
-
8
- needs
9
- pip uninstall -y torch torchvision torchaudio
10
- pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
11
-
12
- """
13
- from nkululeko.util import Util
14
- from nkululeko.featureset import Featureset
15
- import os
16
- import pandas as pd
17
- import os
18
- import nkululeko.glob_conf as glob_conf
19
- import audiofile
20
- import torch
21
- import torchaudio
22
- from torchaudio.pipelines import SQUIM_SUBJECTIVE
23
- from torchaudio.utils import download_asset
24
-
25
- class MOSSet(Featureset):
26
- """Class to predict MOS (mean opinion score)
27
-
28
- """
29
-
30
- def __init__(self, name, data_df):
31
- """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
32
- super().__init__(name, data_df)
33
- self.device = self.util.config_val('MODEL', 'device', 'cpu')
34
- self.model_initialized = False
35
-
36
-
37
- def init_model(self):
38
- # load model
39
- self.util.debug('loading MOS model...')
40
- self.subjective_model = SQUIM_SUBJECTIVE.get_model()
41
- NMR_SPEECH = download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav")
42
- self.WAVEFORM_NMR, SAMPLE_RATE_NMR = torchaudio.load(NMR_SPEECH)
43
- self.model_initialized = True
44
-
45
- def extract(self):
46
- """Extract the features or load them from disk if present."""
47
- store = self.util.get_path('store')
48
- store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
49
- storage = f'{store}{self.name}.{store_format}'
50
- extract = self.util.config_val('FEATS', 'needs_feature_extraction', False)
51
- no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
52
- if extract or no_reuse or not os.path.isfile(storage):
53
- if not self.model_initialized:
54
- self.init_model()
55
- self.util.debug('predicting MOS, this might take a while...')
56
- emb_series = pd.Series(index = self.data_df.index, dtype=object)
57
- length = len(self.data_df.index)
58
- for idx, (file, start, end) in enumerate(self.data_df.index.to_list()):
59
- signal, sampling_rate = audiofile.read(file, offset=start.total_seconds(), duration=(end-start).total_seconds(), always_2d=True)
60
- emb = self.get_embeddings(signal, sampling_rate)
61
- emb_series[idx] = emb
62
- if idx%10==0:
63
- self.util.debug(f'MOS: {idx} of {length} done')
64
- self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
65
- self.df.columns = ['mos']
66
- self.util.write_store(self.df, storage, store_format)
67
- try:
68
- glob_conf.config['DATA']['needs_feature_extraction'] = 'false'
69
- except KeyError:
70
- pass
71
- else:
72
- self.util.debug('reusing predicted MOS values')
73
- self.df = self.util.get_store(storage, store_format)
74
- if self.df.isnull().values.any():
75
- nanrows = self.df.columns[self.df.isna().any()].tolist()
76
- print(nanrows)
77
- self.util.error(f'got nan: {self.df.shape} {self.df.isnull().sum().sum()}')
78
-
79
-
80
- def get_embeddings(self, signal, sampling_rate):
81
- tmp_audio_name = 'mos_audio_tmp.wav'
82
- audiofile.write(tmp_audio_name, signal, sampling_rate)
83
- WAVEFORM_SPEECH, SAMPLE_RATE_SPEECH = torchaudio.load(tmp_audio_name)
84
- with torch.no_grad():
85
- mos = self.subjective_model(WAVEFORM_SPEECH, self.WAVEFORM_NMR)
86
- return float(mos[0].numpy())
87
-
88
-
89
- def extract_sample(self, signal, sr):
90
- self.init_model()
91
- feats = self.get_embeddings(signal, sr)
92
- return feats
@@ -1,84 +0,0 @@
1
- # opensmileset.py
2
- from nkululeko.featureset import Featureset
3
- import opensmile
4
- import os
5
- import pandas as pd
6
- import nkululeko.glob_conf as glob_conf
7
- import ast
8
-
9
- class Opensmileset(Featureset):
10
-
11
- def __init__(self, name, data_df):
12
- super().__init__(name, data_df)
13
- self.featset = self.util.config_val('FEATS', 'set', 'eGeMAPSv02')
14
- try:
15
- self.feature_set = eval(f'opensmile.FeatureSet.{self.featset}')
16
- #'eGeMAPSv02, ComParE_2016, GeMAPSv01a, eGeMAPSv01a':
17
- except AttributeError:
18
- self.util.error(f'something is wrong with feature set: {self.featset}')
19
- self.featlevel = self.util.config_val('FEATS', 'level', 'functionals')
20
- try:
21
- self.featlevel = self.featlevel.replace('lld', 'LowLevelDescriptors')
22
- self.featlevel = self.featlevel.replace('functionals', 'Functionals')
23
- self.feature_level = eval(f'opensmile.FeatureLevel.{self.featlevel}')
24
- except AttributeError:
25
- self.util.error(f'something is wrong with feature level: {self.featlevel}')
26
-
27
-
28
- def extract(self):
29
- """Extract the features based on the initialized dataset or re-open them when found on disk."""
30
- store = self.util.get_path('store')
31
- store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
32
- storage = f'{store}{self.name}.{store_format}'
33
- extract = eval(self.util.config_val('FEATS', 'needs_feature_extraction', 'False'))
34
- no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
35
- if extract or not os.path.isfile(storage) or no_reuse:
36
- self.util.debug('extracting openSmile features, this might take a while...')
37
- smile = opensmile.Smile(
38
- feature_set= self.feature_set,
39
- feature_level=self.feature_level,
40
- num_workers=5,
41
- verbose=True,)
42
- if isinstance(self.data_df.index, pd.MultiIndex):
43
- self.df = smile.process_index(self.data_df.index)
44
- self.df = self.df.set_index(self.data_df.index)
45
- else:
46
- self.df = smile.process_files(self.data_df.index)
47
- self.df.index = self.df.index.droplevel(1)
48
- self.df.index = self.df.index.droplevel(1)
49
- self.util.write_store(self.df, storage, store_format)
50
- try:
51
- glob_conf.config['DATA']['needs_feature_extraction'] = 'False'
52
- except KeyError:
53
- pass
54
- else:
55
- self.util.debug(f'reusing extracted OS features: {storage}.')
56
- self.df = self.util.get_store(storage, store_format)
57
-
58
- def extract_sample(self, signal, sr):
59
- smile = opensmile.Smile(
60
- feature_set=self.feature_set,
61
- feature_level=opensmile.FeatureLevel.Functionals,)
62
- feats = smile.process_signal(signal, sr)
63
- return feats.to_numpy()
64
-
65
- def filter(self):
66
- # use only the features that are indexed in the target dataframes
67
- self.df = self.df[self.df.index.isin(self.data_df.index)]
68
- try:
69
- # use only some features
70
- selected_features = ast.literal_eval(glob_conf.config['FEATS']['os.features'])
71
- self.util.debug(f'selecting features from opensmile: {selected_features}')
72
- sel_feats_df = pd.DataFrame()
73
- hit = False
74
- for feat in selected_features:
75
- try:
76
- sel_feats_df[feat] = self.df[feat]
77
- hit = True
78
- except KeyError:
79
- pass
80
- if hit:
81
- self.df = sel_feats_df
82
- self.util.debug(f'new feats shape after selecting opensmile features: {self.df.shape}')
83
- except KeyError:
84
- pass