nkululeko 0.59.1__py3-none-any.whl → 0.61.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/experiment.py +43 -43
- nkululeko/feature_extractor.py +101 -58
- nkululeko/modelrunner.py +14 -14
- nkululeko/plots.py +11 -0
- nkululeko/segment.py +23 -27
- nkululeko/test_predictor.py +1 -1
- {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/METADATA +13 -1
- nkululeko-0.61.0.dist-info/RECORD +31 -0
- {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/WHEEL +1 -1
- nkululeko/ap_age.py +0 -31
- nkululeko/ap_arousal.py +0 -30
- nkululeko/ap_dominance.py +0 -29
- nkululeko/ap_gender.py +0 -29
- nkululeko/ap_mos.py +0 -35
- nkululeko/ap_pesq.py +0 -35
- nkululeko/ap_sdr.py +0 -36
- nkululeko/ap_snr.py +0 -35
- nkululeko/ap_stoi.py +0 -34
- nkululeko/ap_valence.py +0 -30
- nkululeko/augmenter.py +0 -64
- nkululeko/dataset.py +0 -415
- nkululeko/dataset_csv.py +0 -49
- nkululeko/dataset_ravdess.py +0 -19
- nkululeko/estimate_snr.py +0 -89
- nkululeko/feats_agender.py +0 -63
- nkululeko/feats_agender_agender.py +0 -65
- nkululeko/feats_analyser.py +0 -87
- nkululeko/feats_audmodel.py +0 -63
- nkululeko/feats_audmodel_dim.py +0 -63
- nkululeko/feats_clap.py +0 -74
- nkululeko/feats_import.py +0 -44
- nkululeko/feats_mld.py +0 -47
- nkululeko/feats_mos.py +0 -92
- nkululeko/feats_opensmile.py +0 -84
- nkululeko/feats_oxbow.py +0 -87
- nkululeko/feats_praat.py +0 -72
- nkululeko/feats_snr.py +0 -63
- nkululeko/feats_squim.py +0 -99
- nkululeko/feats_trill.py +0 -74
- nkululeko/feats_wav2vec2.py +0 -94
- nkululeko/featureset.py +0 -41
- nkululeko/feinberg_praat.py +0 -430
- nkululeko/loss_ccc.py +0 -28
- nkululeko/loss_softf1loss.py +0 -40
- nkululeko/model.py +0 -256
- nkululeko/model_bayes.py +0 -14
- nkululeko/model_cnn.py +0 -118
- nkululeko/model_gmm.py +0 -16
- nkululeko/model_knn.py +0 -16
- nkululeko/model_knn_reg.py +0 -16
- nkululeko/model_mlp.py +0 -175
- nkululeko/model_mlp_regression.py +0 -197
- nkululeko/model_svm.py +0 -18
- nkululeko/model_svr.py +0 -18
- nkululeko/model_tree.py +0 -14
- nkululeko/model_tree_reg.py +0 -14
- nkululeko/model_xgb.py +0 -12
- nkululeko/model_xgr.py +0 -12
- nkululeko/randomsplicer.py +0 -76
- nkululeko/randomsplicing.py +0 -74
- nkululeko-0.59.1.dist-info/RECORD +0 -82
- {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/LICENSE +0 -0
- {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/top_level.txt +0 -0
@@ -1,65 +0,0 @@
|
|
1
|
-
# feats_audmodel_dim.py
|
2
|
-
from nkululeko.featureset import Featureset
|
3
|
-
import os
|
4
|
-
import pandas as pd
|
5
|
-
import audeer
|
6
|
-
import nkululeko.glob_conf as glob_conf
|
7
|
-
import audonnx
|
8
|
-
import numpy as np
|
9
|
-
import audinterface
|
10
|
-
|
11
|
-
class AgenderAgenderSet(Featureset):
|
12
|
-
"""
|
13
|
-
Age and gender predictions from the wav2vec2. based model finetuned on agender, described in the paper
|
14
|
-
"Speech-based Age and Gender Prediction with Transformers"
|
15
|
-
https://arxiv.org/abs/2306.16962
|
16
|
-
"""
|
17
|
-
def __init__(self, name, data_df):
|
18
|
-
super().__init__(name, data_df)
|
19
|
-
model_url = 'https://zenodo.org/record/7761387/files/w2v2-L-robust-6-age-gender.25c844af-1.1.1.zip'
|
20
|
-
model_root = self.util.config_val('FEATS', 'agender.model', './audmodel_agender/')
|
21
|
-
if not os.path.isdir(model_root):
|
22
|
-
cache_root = audeer.mkdir('cache')
|
23
|
-
model_root = audeer.mkdir(model_root)
|
24
|
-
archive_path = audeer.download_url(model_url, cache_root, verbose=True)
|
25
|
-
audeer.extract_archive(archive_path, model_root)
|
26
|
-
device = self.util.config_val('MODEL', 'device', 'cpu')
|
27
|
-
self.model = audonnx.load(model_root, device=device)
|
28
|
-
|
29
|
-
|
30
|
-
def extract(self):
|
31
|
-
"""Extract the features based on the initialized dataset or re-open them when found on disk."""
|
32
|
-
store = self.util.get_path('store')
|
33
|
-
store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
|
34
|
-
storage = f'{store}{self.name}.{store_format}'
|
35
|
-
extract = eval(self.util.config_val('FEATS', 'needs_feature_extraction', 'False'))
|
36
|
-
no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
|
37
|
-
sampling_rate = 16000
|
38
|
-
if no_reuse or extract or not os.path.isfile(storage):
|
39
|
-
self.util.debug('extracting agender model age and gender, this might take a while...')
|
40
|
-
outputs = ['logits_age', 'logits_gender']
|
41
|
-
logits = audinterface.Feature(
|
42
|
-
self.model.labels(outputs),
|
43
|
-
process_func=self.model,
|
44
|
-
process_func_args={
|
45
|
-
'outputs': outputs,
|
46
|
-
'concat': True,
|
47
|
-
},
|
48
|
-
sampling_rate=sampling_rate,
|
49
|
-
resample=True,
|
50
|
-
verbose=True,
|
51
|
-
)
|
52
|
-
self.df = logits.process_index(self.data_df.index)
|
53
|
-
self.util.write_store(self.df, storage, store_format)
|
54
|
-
try:
|
55
|
-
glob_conf.config['DATA']['needs_feature_extraction'] = 'False'
|
56
|
-
except KeyError:
|
57
|
-
pass
|
58
|
-
else:
|
59
|
-
self.util.debug('reusing extracted audmodel features.')
|
60
|
-
self.df = self.util.get_store(storage, store_format)
|
61
|
-
|
62
|
-
|
63
|
-
def extract_sample(self, signal, sr):
|
64
|
-
result = self.model(signal, sr)
|
65
|
-
return np.asarray(result['hidden_states'].flatten())
|
nkululeko/feats_analyser.py
DELETED
@@ -1,87 +0,0 @@
|
|
1
|
-
# feats_analyser.py
|
2
|
-
import pandas as pd
|
3
|
-
from nkululeko.util import Util
|
4
|
-
from nkululeko.plots import Plots
|
5
|
-
from sklearn.linear_model import LogisticRegression
|
6
|
-
from sklearn.tree import DecisionTreeClassifier
|
7
|
-
from sklearn.linear_model import LinearRegression
|
8
|
-
from sklearn.tree import DecisionTreeRegressor
|
9
|
-
import matplotlib.pyplot as plt
|
10
|
-
|
11
|
-
class FeatureAnalyser:
|
12
|
-
|
13
|
-
|
14
|
-
def __init__(self, label, df_labels, df_features):
|
15
|
-
self.util = Util('feats_analyser')
|
16
|
-
target = self.util.config_val('DATA', 'target', 'emotion')
|
17
|
-
self.y = df_labels[target]
|
18
|
-
self.df_labels = df_labels
|
19
|
-
self.X = df_features
|
20
|
-
self.label = label
|
21
|
-
|
22
|
-
|
23
|
-
def analyse(self):
|
24
|
-
model_s = self.util.config_val('EXPL', 'model', 'log_reg')
|
25
|
-
max_feat_num = int(self.util.config_val('EXPL', 'max_feats', '10'))
|
26
|
-
importance = None
|
27
|
-
self.util.debug('analysing features...')
|
28
|
-
if self.util.exp_is_classification():
|
29
|
-
if model_s == 'log_reg':
|
30
|
-
model = LogisticRegression()
|
31
|
-
model.fit(self.X, self.y)
|
32
|
-
importance = model.coef_[0]
|
33
|
-
elif model_s == 'tree':
|
34
|
-
model = DecisionTreeClassifier()
|
35
|
-
model.fit(self.X, self.y)
|
36
|
-
importance = model.feature_importances_
|
37
|
-
plot_tree = eval(self.util.config_val('EXPL', 'plot_tree', 'False'))
|
38
|
-
if plot_tree:
|
39
|
-
plots = Plots()
|
40
|
-
plots.plot_tree(model, self.X)
|
41
|
-
else:
|
42
|
-
self.util.error(f'invalid analysis method: {model}')
|
43
|
-
else: # regression experiment
|
44
|
-
if model_s == 'lin_reg':
|
45
|
-
model = LinearRegression()
|
46
|
-
model.fit(self.X, self.y)
|
47
|
-
importance = model.coef_
|
48
|
-
elif model_s == 'tree':
|
49
|
-
model = DecisionTreeRegressor()
|
50
|
-
model.fit(self.X, self.y)
|
51
|
-
importance = model.feature_importances_
|
52
|
-
else:
|
53
|
-
self.util.error(f'invalid analysis method: {model_s}')
|
54
|
-
|
55
|
-
df_imp = pd.DataFrame({'feats':self.X.columns, 'importance':importance})
|
56
|
-
df_imp = df_imp.sort_values(by='importance', ascending=False).iloc[:max_feat_num]
|
57
|
-
ax = df_imp.plot(x='feats', y='importance', kind='bar')
|
58
|
-
ax.set(title=f'{self.label} samples')
|
59
|
-
plt.tight_layout()
|
60
|
-
fig_dir = self.util.get_path('fig_dir')+'../' # one up because of the runs
|
61
|
-
exp_name = self.util.get_exp_name(only_data=True)
|
62
|
-
format = self.util.config_val('PLOT', 'format', 'png')
|
63
|
-
filename = f'{fig_dir}{exp_name}EXPL_{model_s}.{format}'
|
64
|
-
plt.savefig(filename)
|
65
|
-
fig = ax.figure
|
66
|
-
fig.clear()
|
67
|
-
plt.close(fig)
|
68
|
-
# result file
|
69
|
-
res_dir = self.util.get_path('res_dir')
|
70
|
-
file_name = f'{res_dir}{self.util.get_exp_name(only_data=True)}EXPL_{model_s}.txt'
|
71
|
-
with open(file_name, "w") as text_file:
|
72
|
-
text_file.write(f'features in order of decreasing importance according to model {model_s}:\n'+
|
73
|
-
f'{str(df_imp.feats.values)}\n')
|
74
|
-
|
75
|
-
df_imp.to_csv(file_name, mode='a')
|
76
|
-
|
77
|
-
# check if feature distributions should be plotted
|
78
|
-
plot_feats = self.util.config_val('EXPL', 'feature_distributions', False)
|
79
|
-
if plot_feats:
|
80
|
-
sample_selection = self.util.config_val('EXPL', 'sample_selection', 'all')
|
81
|
-
if self.util.exp_is_classification():
|
82
|
-
for feature in df_imp.feats:
|
83
|
-
# plot_feature(self, title, feature, label, df_labels, df_features):
|
84
|
-
_plots = Plots()
|
85
|
-
_plots.plot_feature(sample_selection, feature, 'class_label', self.df_labels, self.X)
|
86
|
-
else:
|
87
|
-
self.util.debug('can\'t plot feature distributions if not classification')
|
nkululeko/feats_audmodel.py
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
# feats_audmodel.py
|
2
|
-
from nkululeko.featureset import Featureset
|
3
|
-
import os
|
4
|
-
import pandas as pd
|
5
|
-
import audeer
|
6
|
-
import nkululeko.glob_conf as glob_conf
|
7
|
-
import audonnx
|
8
|
-
import numpy as np
|
9
|
-
import audinterface
|
10
|
-
|
11
|
-
class AudModelSet(Featureset):
|
12
|
-
"""
|
13
|
-
Embeddings from the wav2vec2. based model finetuned on MSPPodcast emotions, described in the paper
|
14
|
-
"Dawn of the transformer era in speech emotion recognition: closing the valence gap"
|
15
|
-
https://arxiv.org/abs/2203.07378
|
16
|
-
"""
|
17
|
-
def __init__(self, name, data_df):
|
18
|
-
super().__init__(name, data_df)
|
19
|
-
model_url = 'https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip'
|
20
|
-
model_root = self.util.config_val('FEATS', 'aud.model', './audmodel/')
|
21
|
-
if not os.path.isdir(model_root):
|
22
|
-
cache_root = audeer.mkdir('cache')
|
23
|
-
model_root = audeer.mkdir(model_root)
|
24
|
-
archive_path = audeer.download_url(model_url, cache_root, verbose=True)
|
25
|
-
audeer.extract_archive(archive_path, model_root)
|
26
|
-
device = self.util.config_val('MODEL', 'device', 'cpu')
|
27
|
-
self.model = audonnx.load(model_root, device=device)
|
28
|
-
|
29
|
-
|
30
|
-
def extract(self):
|
31
|
-
"""Extract the features based on the initialized dataset or re-open them when found on disk."""
|
32
|
-
store = self.util.get_path('store')
|
33
|
-
store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
|
34
|
-
storage = f'{store}{self.name}.{store_format}'
|
35
|
-
extract = eval(self.util.config_val('FEATS', 'needs_feature_extraction', 'False'))
|
36
|
-
no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
|
37
|
-
if no_reuse or extract or not os.path.isfile(storage):
|
38
|
-
self.util.debug('extracting audmodel embeddings, this might take a while...')
|
39
|
-
hidden_states = audinterface.Feature(
|
40
|
-
self.model.labels('hidden_states'),
|
41
|
-
process_func=self.model,
|
42
|
-
process_func_args={
|
43
|
-
'outputs': 'hidden_states',
|
44
|
-
},
|
45
|
-
sampling_rate=16000,
|
46
|
-
resample=True,
|
47
|
-
num_workers=5,
|
48
|
-
verbose=True,
|
49
|
-
)
|
50
|
-
self.df = hidden_states.process_index(self.data_df.index)
|
51
|
-
self.util.write_store(self.df, storage, store_format)
|
52
|
-
try:
|
53
|
-
glob_conf.config['DATA']['needs_feature_extraction'] = 'False'
|
54
|
-
except KeyError:
|
55
|
-
pass
|
56
|
-
else:
|
57
|
-
self.util.debug('reusing extracted audmodel features.')
|
58
|
-
self.df = self.util.get_store(storage, store_format)
|
59
|
-
|
60
|
-
|
61
|
-
def extract_sample(self, signal, sr):
|
62
|
-
result = self.model(signal, sr)
|
63
|
-
return np.asarray(result['hidden_states'].flatten())
|
nkululeko/feats_audmodel_dim.py
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
# feats_audmodel_dim.py
|
2
|
-
from nkululeko.featureset import Featureset
|
3
|
-
import os
|
4
|
-
import pandas as pd
|
5
|
-
import audeer
|
6
|
-
import nkululeko.glob_conf as glob_conf
|
7
|
-
import audonnx
|
8
|
-
import numpy as np
|
9
|
-
import audinterface
|
10
|
-
|
11
|
-
class AudModelDimSet(Featureset):
|
12
|
-
"""
|
13
|
-
Emotional dimensions from the wav2vec2. based model finetuned on MSPPodcast emotions, described in the paper
|
14
|
-
"Dawn of the transformer era in speech emotion recognition: closing the valence gap"
|
15
|
-
https://arxiv.org/abs/2203.07378
|
16
|
-
"""
|
17
|
-
def __init__(self, name, data_df):
|
18
|
-
super().__init__(name, data_df)
|
19
|
-
model_url = 'https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip'
|
20
|
-
model_root = self.util.config_val('FEATS', 'aud.model', './audmodel/')
|
21
|
-
if not os.path.isdir(model_root):
|
22
|
-
cache_root = audeer.mkdir('cache')
|
23
|
-
model_root = audeer.mkdir(model_root)
|
24
|
-
archive_path = audeer.download_url(model_url, cache_root, verbose=True)
|
25
|
-
audeer.extract_archive(archive_path, model_root)
|
26
|
-
device = self.util.config_val('MODEL', 'device', 'cpu')
|
27
|
-
self.model = audonnx.load(model_root, device=device)
|
28
|
-
|
29
|
-
|
30
|
-
def extract(self):
|
31
|
-
"""Extract the features based on the initialized dataset or re-open them when found on disk."""
|
32
|
-
store = self.util.get_path('store')
|
33
|
-
store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
|
34
|
-
storage = f'{store}{self.name}.{store_format}'
|
35
|
-
extract = eval(self.util.config_val('FEATS', 'needs_feature_extraction', 'False'))
|
36
|
-
no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
|
37
|
-
if no_reuse or extract or not os.path.isfile(storage):
|
38
|
-
self.util.debug('extracting audmodel dimensions, this might take a while...')
|
39
|
-
logits = audinterface.Feature(
|
40
|
-
self.model.labels('logits'),
|
41
|
-
process_func=self.model,
|
42
|
-
process_func_args={
|
43
|
-
'outputs': 'logits',
|
44
|
-
},
|
45
|
-
sampling_rate=16000,
|
46
|
-
resample=True,
|
47
|
-
num_workers=5,
|
48
|
-
verbose=True,
|
49
|
-
)
|
50
|
-
self.df = logits.process_index(self.data_df.index)
|
51
|
-
self.util.write_store(self.df, storage, store_format)
|
52
|
-
try:
|
53
|
-
glob_conf.config['DATA']['needs_feature_extraction'] = 'False'
|
54
|
-
except KeyError:
|
55
|
-
pass
|
56
|
-
else:
|
57
|
-
self.util.debug('reusing extracted audmodel dimensions.')
|
58
|
-
self.df = self.util.get_store(storage, store_format)
|
59
|
-
|
60
|
-
|
61
|
-
def extract_sample(self, signal, sr):
|
62
|
-
result = self.model(signal, sr)
|
63
|
-
return np.asarray(result['hidden_states'].flatten())
|
nkululeko/feats_clap.py
DELETED
@@ -1,74 +0,0 @@
|
|
1
|
-
# feats_clap.py
|
2
|
-
|
3
|
-
from nkululeko.util import Util
|
4
|
-
from nkululeko.featureset import Featureset
|
5
|
-
import os
|
6
|
-
import pandas as pd
|
7
|
-
import os
|
8
|
-
import nkululeko.glob_conf as glob_conf
|
9
|
-
import laion_clap
|
10
|
-
import audiofile
|
11
|
-
|
12
|
-
class Clap(Featureset):
|
13
|
-
"""Class to extract laion's clap embeddings (https://github.com/LAION-AI/CLAP)"""
|
14
|
-
|
15
|
-
def __init__(self, name, data_df):
|
16
|
-
"""Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
|
17
|
-
super().__init__(name, data_df)
|
18
|
-
self.device = self.util.config_val('MODEL', 'device', 'cpu')
|
19
|
-
self.model_initialized = False
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
def init_model(self):
|
24
|
-
# load model
|
25
|
-
self.util.debug('loading clap model...')
|
26
|
-
self.model = laion_clap.CLAP_Module(enable_fusion=False)
|
27
|
-
self.model.load_ckpt() # download the default pretrained checkpoint.
|
28
|
-
print(f'loaded clap model')
|
29
|
-
|
30
|
-
|
31
|
-
def extract(self):
|
32
|
-
"""Extract the features or load them from disk if present."""
|
33
|
-
store = self.util.get_path('store')
|
34
|
-
store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
|
35
|
-
storage = f'{store}{self.name}.{store_format}'
|
36
|
-
extract = self.util.config_val('FEATS', 'needs_feature_extraction', False)
|
37
|
-
no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
|
38
|
-
if extract or no_reuse or not os.path.isfile(storage):
|
39
|
-
if not self.model_initialized:
|
40
|
-
self.init_model()
|
41
|
-
self.util.debug('extracting clap embeddings, this might take a while...')
|
42
|
-
emb_series = pd.Series(index = self.data_df.index, dtype=object)
|
43
|
-
length = len(self.data_df.index)
|
44
|
-
for idx, (file, start, end) in enumerate(self.data_df.index.to_list()):
|
45
|
-
signal, sampling_rate = audiofile.read(file, offset=start.total_seconds(), duration=(end-start).total_seconds(), always_2d=True)
|
46
|
-
emb = self.get_embeddings(signal, sampling_rate)
|
47
|
-
emb_series[idx] = emb
|
48
|
-
if idx%10==0:
|
49
|
-
self.util.debug(f'Clap: {idx} of {length} done')
|
50
|
-
self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
|
51
|
-
self.util.write_store(self.df, storage, store_format)
|
52
|
-
try:
|
53
|
-
glob_conf.config['DATA']['needs_feature_extraction'] = 'false'
|
54
|
-
except KeyError:
|
55
|
-
pass
|
56
|
-
else:
|
57
|
-
self.util.debug('reusing extracted wav2vec2 embeddings')
|
58
|
-
self.df = self.util.get_store(storage, store_format)
|
59
|
-
if self.df.isnull().values.any():
|
60
|
-
nanrows = self.df.columns[self.df.isna().any()].tolist()
|
61
|
-
print(nanrows)
|
62
|
-
self.util.error(f'got nan: {self.df.shape} {self.df.isnull().sum().sum()}')
|
63
|
-
|
64
|
-
|
65
|
-
def get_embeddings(self, signal, sampling_rate):
|
66
|
-
tmp_audio_name = ['clap_audio_tmp.wav']
|
67
|
-
audiofile.write(tmp_audio_name[0], signal, 48000)
|
68
|
-
audio_embed = self.model.get_audio_embedding_from_filelist(x = tmp_audio_name, use_tensor=False)
|
69
|
-
return audio_embed[0]
|
70
|
-
|
71
|
-
def extract_sample(self, signal, sr):
|
72
|
-
self.init_model()
|
73
|
-
feats = self.get_embeddings(signal, sr)
|
74
|
-
return feats
|
nkululeko/feats_import.py
DELETED
@@ -1,44 +0,0 @@
|
|
1
|
-
# feats_import.py
|
2
|
-
|
3
|
-
from nkululeko.util import Util
|
4
|
-
from nkululeko.featureset import Featureset
|
5
|
-
import os
|
6
|
-
import pandas as pd
|
7
|
-
import audformat
|
8
|
-
|
9
|
-
class Importset(Featureset):
|
10
|
-
"""Class to import features that have been compiled elsewhere"""
|
11
|
-
|
12
|
-
def __init__(self, name, data_df):
|
13
|
-
super().__init__(name, data_df)
|
14
|
-
|
15
|
-
def extract(self):
|
16
|
-
"""Import the features or load them from disk if present."""
|
17
|
-
store = self.util.get_path('store')
|
18
|
-
storage = f'{store}{self.name}.pkl'
|
19
|
-
extract = eval(self.util.config_val('FEATS', 'needs_feature_extraction', False))
|
20
|
-
no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
|
21
|
-
feat_import_file = self.util.config_val('FEATS', 'import_file', False)
|
22
|
-
if not os.path.isfile(feat_import_file):
|
23
|
-
self.util.warn(f'no import file: {feat_import_file}')
|
24
|
-
if extract or no_reuse or not os.path.isfile(storage):
|
25
|
-
self.util.debug(f'importing features for {self.name}')
|
26
|
-
# df = pd.read_csv(feat_import_file, sep=',', header=0,
|
27
|
-
# index_col=['file', 'start', 'end'])
|
28
|
-
df = audformat.utils.read_csv(feat_import_file)
|
29
|
-
# scale features before use?
|
30
|
-
# from sklearn.preprocessing import StandardScaler
|
31
|
-
# scaler = StandardScaler()
|
32
|
-
# scaled_features = scaler.fit_transform(df.values)
|
33
|
-
# df = pd.DataFrame(scaled_features, index=df.index, columns=df.columns)
|
34
|
-
# use only the rows from the data index
|
35
|
-
#df = self.data_df.join(df).drop(columns=self.data_df.columns)
|
36
|
-
df = df.loc[self.data_df.index]
|
37
|
-
#df = pd.concat([self.data_df, df], axis=1, join="inner").drop(columns=self.data_df.columns)
|
38
|
-
# in any case, store to disk for later use
|
39
|
-
df.to_pickle(storage)
|
40
|
-
# and assign to be the "official" feature set
|
41
|
-
self.df = df
|
42
|
-
else:
|
43
|
-
self.util.debug('reusing imported features.')
|
44
|
-
self.df = pd.read_pickle(storage)
|
nkululeko/feats_mld.py
DELETED
@@ -1,47 +0,0 @@
|
|
1
|
-
# mld_fset.py
|
2
|
-
from nkululeko.featureset import Featureset
|
3
|
-
import sys
|
4
|
-
import os
|
5
|
-
import pandas as pd
|
6
|
-
import numpy as np
|
7
|
-
from nkululeko.util import Util
|
8
|
-
import nkululeko.glob_conf as glob_conf
|
9
|
-
|
10
|
-
class MLD_set(Featureset):
|
11
|
-
|
12
|
-
def __init__(self, name, data_df):
|
13
|
-
super().__init__(name, data_df)
|
14
|
-
mld_path = self.util.config_val('FEATS', 'mld.model', None)
|
15
|
-
sys.path.append(mld_path)
|
16
|
-
|
17
|
-
def extract(self):
|
18
|
-
store = self.util.get_path('store')
|
19
|
-
storage = f'{store}{self.name}.pkl'
|
20
|
-
no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
|
21
|
-
if no_reuse:
|
22
|
-
os.remove(storage)
|
23
|
-
if not os.path.isfile(storage):
|
24
|
-
self.util.debug('extracting midleveldescriptor features, this might take a while...')
|
25
|
-
else:
|
26
|
-
self.util.debug('reusing previously extracted midleveldescriptor features')
|
27
|
-
import midlevel_descriptors as mld
|
28
|
-
fex_mld = mld.MLD()
|
29
|
-
self.df = fex_mld.extract_from_index(index=self.data_df, cache_path=storage)
|
30
|
-
self.util.debug(f'MLD feats shape: {self.df.shape}')
|
31
|
-
# shouldn't happen
|
32
|
-
# replace NANa with column means values
|
33
|
-
self.util.debug('MLD extractor: checking for NANs...')
|
34
|
-
for i, col in enumerate(self.df.columns):
|
35
|
-
if self.df[col].isnull().values.any():
|
36
|
-
self.util.debug(f'{col} includes {self.df[col].isnull().sum()} nan, inserting mean values')
|
37
|
-
self.df[col] = self.df[col].fillna(self.df[col].mean())
|
38
|
-
|
39
|
-
try:
|
40
|
-
# use only samples that have a minimum number of syllables
|
41
|
-
min_syls = int(glob_conf.config['FEATS']['min_syls'])
|
42
|
-
self.df = self.df[self.df['hld_nSyl']>=min_syls]
|
43
|
-
except KeyError:
|
44
|
-
pass
|
45
|
-
if self.df.isna().to_numpy().any():
|
46
|
-
self.util.error('feats 0: NANs exist')
|
47
|
-
self.df = self.df.astype(float)
|
nkululeko/feats_mos.py
DELETED
@@ -1,92 +0,0 @@
|
|
1
|
-
""" feats_mos.py
|
2
|
-
predict MOS (mean opinion score)
|
3
|
-
|
4
|
-
adapted from
|
5
|
-
from https://pytorch.org/audio/main/tutorials/squim_tutorial.html#sphx-glr-tutorials-squim-tutorial-py
|
6
|
-
paper: https://arxiv.org/pdf/2304.01448.pdf
|
7
|
-
|
8
|
-
needs
|
9
|
-
pip uninstall -y torch torchvision torchaudio
|
10
|
-
pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
11
|
-
|
12
|
-
"""
|
13
|
-
from nkululeko.util import Util
|
14
|
-
from nkululeko.featureset import Featureset
|
15
|
-
import os
|
16
|
-
import pandas as pd
|
17
|
-
import os
|
18
|
-
import nkululeko.glob_conf as glob_conf
|
19
|
-
import audiofile
|
20
|
-
import torch
|
21
|
-
import torchaudio
|
22
|
-
from torchaudio.pipelines import SQUIM_SUBJECTIVE
|
23
|
-
from torchaudio.utils import download_asset
|
24
|
-
|
25
|
-
class MOSSet(Featureset):
|
26
|
-
"""Class to predict MOS (mean opinion score)
|
27
|
-
|
28
|
-
"""
|
29
|
-
|
30
|
-
def __init__(self, name, data_df):
|
31
|
-
"""Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
|
32
|
-
super().__init__(name, data_df)
|
33
|
-
self.device = self.util.config_val('MODEL', 'device', 'cpu')
|
34
|
-
self.model_initialized = False
|
35
|
-
|
36
|
-
|
37
|
-
def init_model(self):
|
38
|
-
# load model
|
39
|
-
self.util.debug('loading MOS model...')
|
40
|
-
self.subjective_model = SQUIM_SUBJECTIVE.get_model()
|
41
|
-
NMR_SPEECH = download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav")
|
42
|
-
self.WAVEFORM_NMR, SAMPLE_RATE_NMR = torchaudio.load(NMR_SPEECH)
|
43
|
-
self.model_initialized = True
|
44
|
-
|
45
|
-
def extract(self):
|
46
|
-
"""Extract the features or load them from disk if present."""
|
47
|
-
store = self.util.get_path('store')
|
48
|
-
store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
|
49
|
-
storage = f'{store}{self.name}.{store_format}'
|
50
|
-
extract = self.util.config_val('FEATS', 'needs_feature_extraction', False)
|
51
|
-
no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
|
52
|
-
if extract or no_reuse or not os.path.isfile(storage):
|
53
|
-
if not self.model_initialized:
|
54
|
-
self.init_model()
|
55
|
-
self.util.debug('predicting MOS, this might take a while...')
|
56
|
-
emb_series = pd.Series(index = self.data_df.index, dtype=object)
|
57
|
-
length = len(self.data_df.index)
|
58
|
-
for idx, (file, start, end) in enumerate(self.data_df.index.to_list()):
|
59
|
-
signal, sampling_rate = audiofile.read(file, offset=start.total_seconds(), duration=(end-start).total_seconds(), always_2d=True)
|
60
|
-
emb = self.get_embeddings(signal, sampling_rate)
|
61
|
-
emb_series[idx] = emb
|
62
|
-
if idx%10==0:
|
63
|
-
self.util.debug(f'MOS: {idx} of {length} done')
|
64
|
-
self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
|
65
|
-
self.df.columns = ['mos']
|
66
|
-
self.util.write_store(self.df, storage, store_format)
|
67
|
-
try:
|
68
|
-
glob_conf.config['DATA']['needs_feature_extraction'] = 'false'
|
69
|
-
except KeyError:
|
70
|
-
pass
|
71
|
-
else:
|
72
|
-
self.util.debug('reusing predicted MOS values')
|
73
|
-
self.df = self.util.get_store(storage, store_format)
|
74
|
-
if self.df.isnull().values.any():
|
75
|
-
nanrows = self.df.columns[self.df.isna().any()].tolist()
|
76
|
-
print(nanrows)
|
77
|
-
self.util.error(f'got nan: {self.df.shape} {self.df.isnull().sum().sum()}')
|
78
|
-
|
79
|
-
|
80
|
-
def get_embeddings(self, signal, sampling_rate):
|
81
|
-
tmp_audio_name = 'mos_audio_tmp.wav'
|
82
|
-
audiofile.write(tmp_audio_name, signal, sampling_rate)
|
83
|
-
WAVEFORM_SPEECH, SAMPLE_RATE_SPEECH = torchaudio.load(tmp_audio_name)
|
84
|
-
with torch.no_grad():
|
85
|
-
mos = self.subjective_model(WAVEFORM_SPEECH, self.WAVEFORM_NMR)
|
86
|
-
return float(mos[0].numpy())
|
87
|
-
|
88
|
-
|
89
|
-
def extract_sample(self, signal, sr):
|
90
|
-
self.init_model()
|
91
|
-
feats = self.get_embeddings(signal, sr)
|
92
|
-
return feats
|
nkululeko/feats_opensmile.py
DELETED
@@ -1,84 +0,0 @@
|
|
1
|
-
# opensmileset.py
|
2
|
-
from nkululeko.featureset import Featureset
|
3
|
-
import opensmile
|
4
|
-
import os
|
5
|
-
import pandas as pd
|
6
|
-
import nkululeko.glob_conf as glob_conf
|
7
|
-
import ast
|
8
|
-
|
9
|
-
class Opensmileset(Featureset):
|
10
|
-
|
11
|
-
def __init__(self, name, data_df):
|
12
|
-
super().__init__(name, data_df)
|
13
|
-
self.featset = self.util.config_val('FEATS', 'set', 'eGeMAPSv02')
|
14
|
-
try:
|
15
|
-
self.feature_set = eval(f'opensmile.FeatureSet.{self.featset}')
|
16
|
-
#'eGeMAPSv02, ComParE_2016, GeMAPSv01a, eGeMAPSv01a':
|
17
|
-
except AttributeError:
|
18
|
-
self.util.error(f'something is wrong with feature set: {self.featset}')
|
19
|
-
self.featlevel = self.util.config_val('FEATS', 'level', 'functionals')
|
20
|
-
try:
|
21
|
-
self.featlevel = self.featlevel.replace('lld', 'LowLevelDescriptors')
|
22
|
-
self.featlevel = self.featlevel.replace('functionals', 'Functionals')
|
23
|
-
self.feature_level = eval(f'opensmile.FeatureLevel.{self.featlevel}')
|
24
|
-
except AttributeError:
|
25
|
-
self.util.error(f'something is wrong with feature level: {self.featlevel}')
|
26
|
-
|
27
|
-
|
28
|
-
def extract(self):
|
29
|
-
"""Extract the features based on the initialized dataset or re-open them when found on disk."""
|
30
|
-
store = self.util.get_path('store')
|
31
|
-
store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
|
32
|
-
storage = f'{store}{self.name}.{store_format}'
|
33
|
-
extract = eval(self.util.config_val('FEATS', 'needs_feature_extraction', 'False'))
|
34
|
-
no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
|
35
|
-
if extract or not os.path.isfile(storage) or no_reuse:
|
36
|
-
self.util.debug('extracting openSmile features, this might take a while...')
|
37
|
-
smile = opensmile.Smile(
|
38
|
-
feature_set= self.feature_set,
|
39
|
-
feature_level=self.feature_level,
|
40
|
-
num_workers=5,
|
41
|
-
verbose=True,)
|
42
|
-
if isinstance(self.data_df.index, pd.MultiIndex):
|
43
|
-
self.df = smile.process_index(self.data_df.index)
|
44
|
-
self.df = self.df.set_index(self.data_df.index)
|
45
|
-
else:
|
46
|
-
self.df = smile.process_files(self.data_df.index)
|
47
|
-
self.df.index = self.df.index.droplevel(1)
|
48
|
-
self.df.index = self.df.index.droplevel(1)
|
49
|
-
self.util.write_store(self.df, storage, store_format)
|
50
|
-
try:
|
51
|
-
glob_conf.config['DATA']['needs_feature_extraction'] = 'False'
|
52
|
-
except KeyError:
|
53
|
-
pass
|
54
|
-
else:
|
55
|
-
self.util.debug(f'reusing extracted OS features: {storage}.')
|
56
|
-
self.df = self.util.get_store(storage, store_format)
|
57
|
-
|
58
|
-
def extract_sample(self, signal, sr):
|
59
|
-
smile = opensmile.Smile(
|
60
|
-
feature_set=self.feature_set,
|
61
|
-
feature_level=opensmile.FeatureLevel.Functionals,)
|
62
|
-
feats = smile.process_signal(signal, sr)
|
63
|
-
return feats.to_numpy()
|
64
|
-
|
65
|
-
def filter(self):
|
66
|
-
# use only the features that are indexed in the target dataframes
|
67
|
-
self.df = self.df[self.df.index.isin(self.data_df.index)]
|
68
|
-
try:
|
69
|
-
# use only some features
|
70
|
-
selected_features = ast.literal_eval(glob_conf.config['FEATS']['os.features'])
|
71
|
-
self.util.debug(f'selecting features from opensmile: {selected_features}')
|
72
|
-
sel_feats_df = pd.DataFrame()
|
73
|
-
hit = False
|
74
|
-
for feat in selected_features:
|
75
|
-
try:
|
76
|
-
sel_feats_df[feat] = self.df[feat]
|
77
|
-
hit = True
|
78
|
-
except KeyError:
|
79
|
-
pass
|
80
|
-
if hit:
|
81
|
-
self.df = sel_feats_df
|
82
|
-
self.util.debug(f'new feats shape after selecting opensmile features: {self.df.shape}')
|
83
|
-
except KeyError:
|
84
|
-
pass
|