nkululeko 0.56.0__py3-none-any.whl → 0.58.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/ap_arousal.py +30 -0
- nkululeko/ap_dominance.py +29 -0
- nkululeko/ap_gender.py +0 -2
- nkululeko/ap_mos.py +35 -0
- nkululeko/ap_pesq.py +35 -0
- nkululeko/ap_snr.py +1 -1
- nkululeko/ap_valence.py +30 -0
- nkululeko/constants.py +1 -1
- nkululeko/dataset.py +0 -1
- nkululeko/experiment.py +26 -3
- nkululeko/feats_mos.py +92 -0
- nkululeko/feats_pesq.py +89 -0
- nkululeko/feature_extractor.py +6 -0
- nkululeko/{autopredict.py → predict.py} +7 -3
- {nkululeko-0.56.0.dist-info → nkululeko-0.58.0.dist-info}/METADATA +22 -3
- {nkululeko-0.56.0.dist-info → nkululeko-0.58.0.dist-info}/RECORD +19 -12
- {nkululeko-0.56.0.dist-info → nkululeko-0.58.0.dist-info}/LICENSE +0 -0
- {nkululeko-0.56.0.dist-info → nkululeko-0.58.0.dist-info}/WHEEL +0 -0
- {nkululeko-0.56.0.dist-info → nkululeko-0.58.0.dist-info}/top_level.txt +0 -0
nkululeko/ap_arousal.py
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
""""
|
2
|
+
A predictor for emotional arousal.
|
3
|
+
Currently based on audEERING's emotional dimension model.
|
4
|
+
"""
|
5
|
+
from nkululeko.util import Util
|
6
|
+
from nkululeko.feature_extractor import FeatureExtractor
|
7
|
+
import ast
|
8
|
+
import nkululeko.glob_conf as glob_conf
|
9
|
+
class ArousalPredictor:
|
10
|
+
"""
|
11
|
+
ArousalPredictor
|
12
|
+
predicting arousal with the audEERING emotional dimension model
|
13
|
+
|
14
|
+
"""
|
15
|
+
def __init__(self, df):
|
16
|
+
self.df = df
|
17
|
+
self.util = Util('arousalPredictor')
|
18
|
+
|
19
|
+
|
20
|
+
def predict(self, split_selection):
|
21
|
+
self.util.debug(f'predicting arousal for {split_selection} samples')
|
22
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config['DATA']['databases']))
|
23
|
+
self.feature_extractor = FeatureExtractor(self.df, ['auddim'], feats_name, split_selection)
|
24
|
+
pred_df = self.feature_extractor.extract()
|
25
|
+
pred_vals = pred_df.arousal * 1000
|
26
|
+
return_df = self.df.copy()
|
27
|
+
return_df['arousal_pred'] = pred_vals.astype('int')/1000
|
28
|
+
|
29
|
+
return return_df
|
30
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
"""
|
2
|
+
A predictor for emotional dominance.
|
3
|
+
Currently based on audEERING's emotional dimension model.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from nkululeko.util import Util
|
7
|
+
from nkululeko.feature_extractor import FeatureExtractor
|
8
|
+
import ast
|
9
|
+
import nkululeko.glob_conf as glob_conf
|
10
|
+
class DominancePredictor:
|
11
|
+
"""
|
12
|
+
DominancePredictor
|
13
|
+
predicting dominance with the audEERING emotional dimension model
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(self, df):
|
17
|
+
self.df = df
|
18
|
+
self.util = Util('dominancePredictor')
|
19
|
+
|
20
|
+
def predict(self, split_selection):
|
21
|
+
self.util.debug(f'predicting dominance for {split_selection} samples')
|
22
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config['DATA']['databases']))
|
23
|
+
self.feature_extractor = FeatureExtractor(self.df, ['auddim'], feats_name, split_selection)
|
24
|
+
pred_df = self.feature_extractor.extract()
|
25
|
+
pred_vals = pred_df.dominance * 1000
|
26
|
+
return_df = self.df.copy()
|
27
|
+
return_df['dominance_pred'] = pred_vals.astype('int')/1000
|
28
|
+
|
29
|
+
return return_df
|
nkululeko/ap_gender.py
CHANGED
@@ -22,10 +22,8 @@ class GenderPredictor:
|
|
22
22
|
feats_name = "_".join(ast.literal_eval(glob_conf.config['DATA']['databases']))
|
23
23
|
self.feature_extractor = FeatureExtractor(self.df, ['agender_agender'], feats_name, split_selection)
|
24
24
|
agender_df = self.feature_extractor.extract()
|
25
|
-
pred_age = agender_df.age * 100
|
26
25
|
pred_gender = agender_df.drop('age', axis=1).idxmax(axis=1)
|
27
26
|
return_df = self.df.copy()
|
28
27
|
return_df['gender_pred'] = pred_gender
|
29
|
-
# return_df['age_pred'] = pred_age.astype('int')
|
30
28
|
return return_df
|
31
29
|
|
nkululeko/ap_mos.py
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
""""
|
2
|
+
A predictor for MOS - mean opinion score.
|
3
|
+
"""
|
4
|
+
from nkululeko.util import Util
|
5
|
+
import ast
|
6
|
+
import nkululeko.glob_conf as glob_conf
|
7
|
+
from nkululeko.feature_extractor import FeatureExtractor
|
8
|
+
import numpy as np
|
9
|
+
|
10
|
+
|
11
|
+
class MOSPredictor:
|
12
|
+
"""
|
13
|
+
MOSPredictor
|
14
|
+
predicting MOS
|
15
|
+
|
16
|
+
"""
|
17
|
+
def __init__(self, df):
|
18
|
+
self.df = df
|
19
|
+
self.util = Util('mosPredictor')
|
20
|
+
|
21
|
+
|
22
|
+
def predict(self, split_selection):
|
23
|
+
self.util.debug(f'estimating MOS for {split_selection} samples')
|
24
|
+
return_df = self.df.copy()
|
25
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config['DATA']['databases']))
|
26
|
+
self.feature_extractor = FeatureExtractor(self.df, ['mos'], feats_name, split_selection)
|
27
|
+
result_df = self.feature_extractor.extract()
|
28
|
+
# replace missing values by 0
|
29
|
+
result_df = result_df.fillna(0)
|
30
|
+
result_df = result_df.replace(np.nan, 0)
|
31
|
+
result_df.replace([np.inf, -np.inf], 0, inplace=True)
|
32
|
+
pred_snr = result_df.mos * 100
|
33
|
+
return_df['mos_pred'] = pred_snr.astype('int')/100
|
34
|
+
return return_df
|
35
|
+
|
nkululeko/ap_pesq.py
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
""""
|
2
|
+
A predictor for PESQ - Perceptual Evaluation of Speech Quality.
|
3
|
+
"""
|
4
|
+
from nkululeko.util import Util
|
5
|
+
import ast
|
6
|
+
import nkululeko.glob_conf as glob_conf
|
7
|
+
from nkululeko.feature_extractor import FeatureExtractor
|
8
|
+
import numpy as np
|
9
|
+
|
10
|
+
|
11
|
+
class PESQPredictor:
|
12
|
+
"""
|
13
|
+
PESQPredictor
|
14
|
+
predicting PESQ
|
15
|
+
|
16
|
+
"""
|
17
|
+
def __init__(self, df):
|
18
|
+
self.df = df
|
19
|
+
self.util = Util('pesqPredictor')
|
20
|
+
|
21
|
+
|
22
|
+
def predict(self, split_selection):
|
23
|
+
self.util.debug(f'estimating PESQ for {split_selection} samples')
|
24
|
+
return_df = self.df.copy()
|
25
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config['DATA']['databases']))
|
26
|
+
self.feature_extractor = FeatureExtractor(self.df, ['pesq'], feats_name, split_selection)
|
27
|
+
result_df = self.feature_extractor.extract()
|
28
|
+
# replace missing values by 0
|
29
|
+
result_df = result_df.fillna(0)
|
30
|
+
result_df = result_df.replace(np.nan, 0)
|
31
|
+
result_df.replace([np.inf, -np.inf], 0, inplace=True)
|
32
|
+
pred_vals = result_df.pesq * 100
|
33
|
+
return_df['pesq_pred'] = pred_vals.astype('int')/100
|
34
|
+
return return_df
|
35
|
+
|
nkululeko/ap_snr.py
CHANGED
@@ -30,6 +30,6 @@ class SNRPredictor:
|
|
30
30
|
result_df = result_df.replace(np.nan, 0)
|
31
31
|
result_df.replace([np.inf, -np.inf], 0, inplace=True)
|
32
32
|
pred_snr = result_df.snr * 100
|
33
|
-
return_df['
|
33
|
+
return_df['snr_pred'] = pred_snr.astype('int')/100
|
34
34
|
return return_df
|
35
35
|
|
nkululeko/ap_valence.py
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
""""
|
2
|
+
A predictor for emotional valence.
|
3
|
+
Currently based on audEERING's emotional dimension model.
|
4
|
+
"""
|
5
|
+
from nkululeko.util import Util
|
6
|
+
from nkululeko.feature_extractor import FeatureExtractor
|
7
|
+
import ast
|
8
|
+
import nkululeko.glob_conf as glob_conf
|
9
|
+
class ValencePredictor:
|
10
|
+
"""
|
11
|
+
ValencePredictor
|
12
|
+
predicting valence with the audEERING emotional dimension model
|
13
|
+
|
14
|
+
"""
|
15
|
+
def __init__(self, df):
|
16
|
+
self.df = df
|
17
|
+
self.util = Util('valencePredictor')
|
18
|
+
|
19
|
+
|
20
|
+
def predict(self, split_selection):
|
21
|
+
self.util.debug(f'predicting valence for {split_selection} samples')
|
22
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config['DATA']['databases']))
|
23
|
+
self.feature_extractor = FeatureExtractor(self.df, ['auddim'], feats_name, split_selection)
|
24
|
+
pred_df = self.feature_extractor.extract()
|
25
|
+
pred_vals = pred_df.valence * 1000
|
26
|
+
return_df = self.df.copy()
|
27
|
+
return_df['valence_pred'] = pred_vals.astype('int')/1000
|
28
|
+
|
29
|
+
return return_df
|
30
|
+
|
nkululeko/constants.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '0.
|
1
|
+
VERSION = '0.58.0'
|
nkululeko/dataset.py
CHANGED
@@ -67,7 +67,6 @@ class Dataset:
|
|
67
67
|
# store the dataframe
|
68
68
|
store = self.util.get_path('store')
|
69
69
|
store_file = f'{store}{self.name}.pkl'
|
70
|
-
self.util.debug(f'{self.name}: loading ...')
|
71
70
|
self.root = self._load_db()
|
72
71
|
# self.got_speaker, self.got_gender = False, False
|
73
72
|
if not self.start_fresh and os.path.isfile(store_file):
|
nkululeko/experiment.py
CHANGED
@@ -82,7 +82,8 @@ class Experiment:
|
|
82
82
|
self.got_speaker = True
|
83
83
|
self.datasets.update({d: data})
|
84
84
|
self.target = self.util.config_val('DATA', 'target', 'emotion')
|
85
|
-
|
85
|
+
dbs = ','.join(list(self.datasets.keys()))
|
86
|
+
self.util.debug(f'loaded databases {dbs}')
|
86
87
|
|
87
88
|
def _import_csv(self, storage):
|
88
89
|
# df = pd.read_csv(storage, header=0, index_col=[0,1,2])
|
@@ -345,14 +346,36 @@ class Experiment:
|
|
345
346
|
from nkululeko.ap_gender import GenderPredictor
|
346
347
|
predictor = GenderPredictor(df)
|
347
348
|
df = predictor.predict(sample_selection)
|
348
|
-
|
349
|
+
elif target == 'age':
|
349
350
|
from nkululeko.ap_age import AgePredictor
|
350
351
|
predictor = AgePredictor(df)
|
351
352
|
df = predictor.predict(sample_selection)
|
352
|
-
|
353
|
+
elif target == 'snr':
|
353
354
|
from nkululeko.ap_snr import SNRPredictor
|
354
355
|
predictor = SNRPredictor(df)
|
355
356
|
df = predictor.predict(sample_selection)
|
357
|
+
elif target == 'mos':
|
358
|
+
from nkululeko.ap_mos import MOSPredictor
|
359
|
+
predictor = MOSPredictor(df)
|
360
|
+
df = predictor.predict(sample_selection)
|
361
|
+
elif target == 'pesq':
|
362
|
+
from nkululeko.ap_pesq import PESQPredictor
|
363
|
+
predictor = PESQPredictor(df)
|
364
|
+
df = predictor.predict(sample_selection)
|
365
|
+
elif target == 'arousal':
|
366
|
+
from nkululeko.ap_arousal import ArousalPredictor
|
367
|
+
predictor = ArousalPredictor(df)
|
368
|
+
df = predictor.predict(sample_selection)
|
369
|
+
elif target == 'valence':
|
370
|
+
from nkululeko.ap_valence import ValencePredictor
|
371
|
+
predictor = ValencePredictor(df)
|
372
|
+
df = predictor.predict(sample_selection)
|
373
|
+
elif target == 'dominance':
|
374
|
+
from nkululeko.ap_dominance import DominancePredictor
|
375
|
+
predictor = DominancePredictor(df)
|
376
|
+
df = predictor.predict(sample_selection)
|
377
|
+
else:
|
378
|
+
self.util.error(f'unknown auto predict target: {target}')
|
356
379
|
return df
|
357
380
|
|
358
381
|
def random_splice(self):
|
nkululeko/feats_mos.py
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
""" feats_mos.py
|
2
|
+
predict MOS (mean opinion score)
|
3
|
+
|
4
|
+
adapted from
|
5
|
+
from https://pytorch.org/audio/main/tutorials/squim_tutorial.html#sphx-glr-tutorials-squim-tutorial-py
|
6
|
+
paper: https://arxiv.org/pdf/2304.01448.pdf
|
7
|
+
|
8
|
+
needs
|
9
|
+
pip uninstall -y torch torchvision torchaudio
|
10
|
+
pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
11
|
+
|
12
|
+
"""
|
13
|
+
from nkululeko.util import Util
|
14
|
+
from nkululeko.featureset import Featureset
|
15
|
+
import os
|
16
|
+
import pandas as pd
|
17
|
+
import os
|
18
|
+
import nkululeko.glob_conf as glob_conf
|
19
|
+
import audiofile
|
20
|
+
import torch
|
21
|
+
import torchaudio
|
22
|
+
from torchaudio.pipelines import SQUIM_SUBJECTIVE
|
23
|
+
from torchaudio.utils import download_asset
|
24
|
+
|
25
|
+
class MOSSet(Featureset):
|
26
|
+
"""Class to predict MOS (mean opinion score)
|
27
|
+
|
28
|
+
"""
|
29
|
+
|
30
|
+
def __init__(self, name, data_df):
|
31
|
+
"""Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
|
32
|
+
super().__init__(name, data_df)
|
33
|
+
self.device = self.util.config_val('MODEL', 'device', 'cpu')
|
34
|
+
self.model_initialized = False
|
35
|
+
|
36
|
+
|
37
|
+
def init_model(self):
|
38
|
+
# load model
|
39
|
+
self.util.debug('loading MOS model...')
|
40
|
+
self.subjective_model = SQUIM_SUBJECTIVE.get_model()
|
41
|
+
NMR_SPEECH = download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav")
|
42
|
+
self.WAVEFORM_NMR, SAMPLE_RATE_NMR = torchaudio.load(NMR_SPEECH)
|
43
|
+
self.model_initialized = True
|
44
|
+
|
45
|
+
def extract(self):
|
46
|
+
"""Extract the features or load them from disk if present."""
|
47
|
+
store = self.util.get_path('store')
|
48
|
+
store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
|
49
|
+
storage = f'{store}{self.name}.{store_format}'
|
50
|
+
extract = self.util.config_val('FEATS', 'needs_feature_extraction', False)
|
51
|
+
no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
|
52
|
+
if extract or no_reuse or not os.path.isfile(storage):
|
53
|
+
if not self.model_initialized:
|
54
|
+
self.init_model()
|
55
|
+
self.util.debug('predicting MOS, this might take a while...')
|
56
|
+
emb_series = pd.Series(index = self.data_df.index, dtype=object)
|
57
|
+
length = len(self.data_df.index)
|
58
|
+
for idx, (file, start, end) in enumerate(self.data_df.index.to_list()):
|
59
|
+
signal, sampling_rate = audiofile.read(file, offset=start.total_seconds(), duration=(end-start).total_seconds(), always_2d=True)
|
60
|
+
emb = self.get_embeddings(signal, sampling_rate)
|
61
|
+
emb_series[idx] = emb
|
62
|
+
if idx%10==0:
|
63
|
+
self.util.debug(f'MOS: {idx} of {length} done')
|
64
|
+
self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
|
65
|
+
self.df.columns = ['mos']
|
66
|
+
self.util.write_store(self.df, storage, store_format)
|
67
|
+
try:
|
68
|
+
glob_conf.config['DATA']['needs_feature_extraction'] = 'false'
|
69
|
+
except KeyError:
|
70
|
+
pass
|
71
|
+
else:
|
72
|
+
self.util.debug('reusing predicted MOS values')
|
73
|
+
self.df = self.util.get_store(storage, store_format)
|
74
|
+
if self.df.isnull().values.any():
|
75
|
+
nanrows = self.df.columns[self.df.isna().any()].tolist()
|
76
|
+
print(nanrows)
|
77
|
+
self.util.error(f'got nan: {self.df.shape} {self.df.isnull().sum().sum()}')
|
78
|
+
|
79
|
+
|
80
|
+
def get_embeddings(self, signal, sampling_rate):
|
81
|
+
tmp_audio_name = 'mos_audio_tmp.wav'
|
82
|
+
audiofile.write(tmp_audio_name, signal, sampling_rate)
|
83
|
+
WAVEFORM_SPEECH, SAMPLE_RATE_SPEECH = torchaudio.load(tmp_audio_name)
|
84
|
+
with torch.no_grad():
|
85
|
+
mos = self.subjective_model(WAVEFORM_SPEECH, self.WAVEFORM_NMR)
|
86
|
+
return float(mos[0].numpy())
|
87
|
+
|
88
|
+
|
89
|
+
def extract_sample(self, signal, sr):
|
90
|
+
self.init_model()
|
91
|
+
feats = self.get_embeddings(signal, sr)
|
92
|
+
return feats
|
nkululeko/feats_pesq.py
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
""" feats_pesq.py
|
2
|
+
predict PESQ (Perceptual Evaluation of Speech Quality)
|
3
|
+
|
4
|
+
adapted from
|
5
|
+
from https://pytorch.org/audio/main/tutorials/squim_tutorial.html#sphx-glr-tutorials-squim-tutorial-py
|
6
|
+
paper: https://arxiv.org/pdf/2304.01448.pdf
|
7
|
+
|
8
|
+
needs
|
9
|
+
pip uninstall -y torch torchvision torchaudio
|
10
|
+
pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
11
|
+
|
12
|
+
"""
|
13
|
+
from nkululeko.util import Util
|
14
|
+
from nkululeko.featureset import Featureset
|
15
|
+
import os
|
16
|
+
import pandas as pd
|
17
|
+
import os
|
18
|
+
import nkululeko.glob_conf as glob_conf
|
19
|
+
import audiofile
|
20
|
+
import torch
|
21
|
+
import torchaudio
|
22
|
+
from torchaudio.pipelines import SQUIM_OBJECTIVE
|
23
|
+
|
24
|
+
class PESQSet(Featureset):
|
25
|
+
"""Class to predict PESQ (Perceptual Evaluation of Speech Quality)
|
26
|
+
|
27
|
+
"""
|
28
|
+
|
29
|
+
def __init__(self, name, data_df):
|
30
|
+
"""Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
|
31
|
+
super().__init__(name, data_df)
|
32
|
+
self.device = self.util.config_val('MODEL', 'device', 'cpu')
|
33
|
+
self.model_initialized = False
|
34
|
+
|
35
|
+
|
36
|
+
def init_model(self):
|
37
|
+
# load model
|
38
|
+
self.util.debug('loading model...')
|
39
|
+
self.objective_model = SQUIM_OBJECTIVE.get_model()
|
40
|
+
self.model_initialized = True
|
41
|
+
|
42
|
+
|
43
|
+
def extract(self):
|
44
|
+
"""Extract the features or load them from disk if present."""
|
45
|
+
store = self.util.get_path('store')
|
46
|
+
store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
|
47
|
+
storage = f'{store}{self.name}.{store_format}'
|
48
|
+
extract = self.util.config_val('FEATS', 'needs_feature_extraction', False)
|
49
|
+
no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
|
50
|
+
if extract or no_reuse or not os.path.isfile(storage):
|
51
|
+
if not self.model_initialized:
|
52
|
+
self.init_model()
|
53
|
+
self.util.debug('predicting PESQ, this might take a while...')
|
54
|
+
emb_series = pd.Series(index = self.data_df.index, dtype=object)
|
55
|
+
length = len(self.data_df.index)
|
56
|
+
for idx, (file, start, end) in enumerate(self.data_df.index.to_list()):
|
57
|
+
signal, sampling_rate = audiofile.read(file, offset=start.total_seconds(), duration=(end-start).total_seconds(), always_2d=True)
|
58
|
+
emb = self.get_embeddings(signal, sampling_rate)
|
59
|
+
emb_series[idx] = emb
|
60
|
+
if idx%10==0:
|
61
|
+
self.util.debug(f'PESQ: {idx} of {length} done')
|
62
|
+
self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
|
63
|
+
self.df.columns = ['pesq']
|
64
|
+
self.util.write_store(self.df, storage, store_format)
|
65
|
+
try:
|
66
|
+
glob_conf.config['DATA']['needs_feature_extraction'] = 'false'
|
67
|
+
except KeyError:
|
68
|
+
pass
|
69
|
+
else:
|
70
|
+
self.util.debug('reusing predicted PESQ values')
|
71
|
+
self.df = self.util.get_store(storage, store_format)
|
72
|
+
if self.df.isnull().values.any():
|
73
|
+
nanrows = self.df.columns[self.df.isna().any()].tolist()
|
74
|
+
print(nanrows)
|
75
|
+
self.util.error(f'got nan: {self.df.shape} {self.df.isnull().sum().sum()}')
|
76
|
+
|
77
|
+
|
78
|
+
def get_embeddings(self, signal, sampling_rate):
|
79
|
+
tmp_audio_name = 'pesq_audio_tmp.wav'
|
80
|
+
audiofile.write(tmp_audio_name, signal, sampling_rate)
|
81
|
+
WAVEFORM_SPEECH, SAMPLE_RATE_SPEECH = torchaudio.load(tmp_audio_name)
|
82
|
+
with torch.no_grad():
|
83
|
+
stoi_hyp, pesq_hyp, si_sdr_hyp = self.objective_model(WAVEFORM_SPEECH)
|
84
|
+
return float(pesq_hyp[0].numpy())
|
85
|
+
|
86
|
+
def extract_sample(self, signal, sr):
|
87
|
+
self.init_model()
|
88
|
+
feats = self.get_embeddings(signal, sr)
|
89
|
+
return feats
|
nkululeko/feature_extractor.py
CHANGED
@@ -61,6 +61,12 @@ class FeatureExtractor:
|
|
61
61
|
elif feats_type=='snr':
|
62
62
|
from nkululeko.feats_snr import SNRSet
|
63
63
|
self.featExtractor = SNRSet(f'{store_name}_{self.feats_designation}', self.data_df)
|
64
|
+
elif feats_type=='mos':
|
65
|
+
from nkululeko.feats_mos import MOSSet
|
66
|
+
self.featExtractor = MOSSet(f'{store_name}_{self.feats_designation}', self.data_df)
|
67
|
+
elif feats_type=='pesq':
|
68
|
+
from nkululeko.feats_pesq import PESQSet
|
69
|
+
self.featExtractor = PESQSet(f'{store_name}_{self.feats_designation}', self.data_df)
|
64
70
|
elif feats_type=='clap':
|
65
71
|
from nkululeko.feats_clap import Clap
|
66
72
|
self.featExtractor = Clap(f'{store_name}_{self.feats_designation}', self.data_df)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# predict.py
|
2
2
|
# use some model and add automatically predicted labels to train and test splits, than save as a new dataset
|
3
3
|
|
4
4
|
from nkululeko.experiment import Experiment
|
@@ -27,7 +27,7 @@ def main(src_dir):
|
|
27
27
|
config.read(config_file)
|
28
28
|
# create a new experiment
|
29
29
|
expr = Experiment(config)
|
30
|
-
util = Util('
|
30
|
+
util = Util('predict')
|
31
31
|
util.debug(f'running {expr.name} from config {config_file}, nkululeko version {VERSION}')
|
32
32
|
|
33
33
|
# load the data
|
@@ -39,7 +39,11 @@ def main(src_dir):
|
|
39
39
|
|
40
40
|
# process the data
|
41
41
|
df = expr.autopredict()
|
42
|
-
|
42
|
+
target = util.config_val('DATA', 'target', 'emotion')
|
43
|
+
if 'class_label' in df.columns:
|
44
|
+
df = df.drop(columns=[target])
|
45
|
+
df = df.rename(columns={'class_label':target})
|
46
|
+
name = util.get_data_name()+'_predicted'
|
43
47
|
df.to_csv(f'{expr.data_dir}/{name}.csv')
|
44
48
|
util.debug(f'saved {name}.csv to {expr.data_dir}')
|
45
49
|
print('DONE')
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.58.0
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -42,6 +42,7 @@ Requires-Dist: xgboost
|
|
42
42
|
# Nkululeko
|
43
43
|
* [Overview](#overview)
|
44
44
|
* [Installation](#installation)
|
45
|
+
* [Documentation](https://nkululeko.readthedocs.io)
|
45
46
|
* [Usage](#usage)
|
46
47
|
* [Hello World](#hello-world-example)
|
47
48
|
* [Licence](#licence)
|
@@ -91,7 +92,11 @@ Sometimes you only want to take a look at your data:
|
|
91
92
|
|
92
93
|
<img src="meta/images/data_plot.png" width="500px"/>
|
93
94
|
|
94
|
-
|
95
|
+
|
96
|
+
## Documentation
|
97
|
+
The documentation, along with extensions of installation, usage, INI file format, and examples, can be found [nkululeko.readthedocs.io](https://nkululeko.readthedocs.io).
|
98
|
+
|
99
|
+
## Installation
|
95
100
|
|
96
101
|
Create and activate a virtual Python environment and simply run
|
97
102
|
```
|
@@ -131,10 +136,11 @@ Read the [Hello World example](#hello-world-example) for initial usage with Emo-
|
|
131
136
|
|
132
137
|
Here is an overview of the interfaces:
|
133
138
|
* **nkululeko.nkululeko**: doing experiments
|
134
|
-
* **nkululeko.demo**: demo the current best model on command line
|
139
|
+
* **nkululeko.demo**: demo the current best model on the command line
|
135
140
|
* **nkululeko.test**: predict a series of files with the current best model
|
136
141
|
* **nkululeko.explore**: perform data exploration
|
137
142
|
* **nkululeko.augment**: augment the current training data
|
143
|
+
* **nkululeko.predict**: predict a series of files with a given model
|
138
144
|
|
139
145
|
Alternatively, there is a central "experiment" class that can be used by own experiments
|
140
146
|
|
@@ -242,6 +248,19 @@ Nkululeko can be used under the [MIT license](https://choosealicense.com/license
|
|
242
248
|
Changelog
|
243
249
|
=========
|
244
250
|
|
251
|
+
Version 0.58.0
|
252
|
+
--------------
|
253
|
+
* added dominance predict
|
254
|
+
* added MOS predict
|
255
|
+
* added PESQ predict
|
256
|
+
|
257
|
+
Version 0.57.0
|
258
|
+
--------------
|
259
|
+
* renamed autopredict predict
|
260
|
+
* added arousal autopredict
|
261
|
+
* added valence autopredict
|
262
|
+
|
263
|
+
|
245
264
|
Version 0.56.0
|
246
265
|
--------------
|
247
266
|
* added autopredict module
|
@@ -1,20 +1,24 @@
|
|
1
1
|
nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
2
2
|
nkululeko/ap_age.py,sha256=ZpWtNpfqseK2JwD2ZeRXdEtXm9EKmaeYwdQ-r-e0FcM,1054
|
3
|
-
nkululeko/
|
4
|
-
nkululeko/
|
3
|
+
nkululeko/ap_arousal.py,sha256=mYVHBdX4wmAjEZ9DvKBiVONJuDiiGN0Zn8HintV6OUQ,997
|
4
|
+
nkululeko/ap_dominance.py,sha256=gwsQ8OMSTgY9oG26L9wk4qfgGjR_gyo66yiDNQ9PY5I,1006
|
5
|
+
nkululeko/ap_gender.py,sha256=MfySaQNQEpV9GZCJm1vas_SIcXrCh7O5Q4X6AXVcQDE,983
|
6
|
+
nkululeko/ap_mos.py,sha256=GByXFm4IpHDyxUHqF5hmqnZbX4LfdH2quHjxzcrlzg0,1079
|
7
|
+
nkululeko/ap_pesq.py,sha256=DapnhAST1YHOkXptxoqvbFAti8rh2jvsR7HIGWxshZA,1111
|
8
|
+
nkululeko/ap_snr.py,sha256=X5Pnl9PDOw-w1cJSmzKaPtCMEiQhko6pWd0YAGeGYjQ,1082
|
9
|
+
nkululeko/ap_valence.py,sha256=cyFrKRy68EU5gDiVg0_HxiwaKGbAJ9UuwuiXgVTalAU,997
|
5
10
|
nkululeko/augment.py,sha256=8YVGm4kJWynrMknKc0okveKfHtv8KrnkwWZm60-ZcHo,1630
|
6
11
|
nkululeko/augmenter.py,sha256=nOJPb7Ut4wLlEcla1JaUiLntksrZqfvq1mFOCexpT7Y,2584
|
7
|
-
nkululeko/autopredict.py,sha256=3YQPTyVtaFt0DQmop5Q3F-FvyA_4XRF3XyO3C_4v0vo,1675
|
8
12
|
nkululeko/balancer.py,sha256=64ftZN68sMDfkvuovCDHpAHmSJgCO6Kdk9bwmpSisec,12
|
9
13
|
nkululeko/cacheddataset.py,sha256=bSJ_SDg7TxL89YL_pJXp-sFvdUXJtHuBTd5KSTE4AkQ,955
|
10
|
-
nkululeko/constants.py,sha256=
|
11
|
-
nkululeko/dataset.py,sha256=
|
14
|
+
nkululeko/constants.py,sha256=NVd5ylf2ISBL-XTr0fu2sJrttOdnSTrRPQrim34mGTM,18
|
15
|
+
nkululeko/dataset.py,sha256=pI4UYyR-H5ZeZBGXZqIOh1vGzPvzfp6yQx_cpQ-JIn8,20458
|
12
16
|
nkululeko/dataset_csv.py,sha256=4Pz_sjF_LJBd5mYDlnfY50ogksQ9j3XBczlI_rQrJ0Y,2110
|
13
17
|
nkululeko/dataset_ravdess.py,sha256=pTt98sr4_egdUCv2fWepkZTlkQ6x3A2YshO_n302DNg,537
|
14
18
|
nkululeko/demo.py,sha256=nGP3fUDXuW1ZF12AzMpzRWXct0rdqYRJVNgA9B_QWwA,1821
|
15
19
|
nkululeko/demo_predictor.py,sha256=VVxE2lf5lTkAP5qElG5U2bK6SdDzQ2Jmf0Vn_yHpSro,2302
|
16
20
|
nkululeko/estimate_snr.py,sha256=m3JZSRGXr2M9yVgovTSZ63rpho9hIUfHOXfAVRAHf1k,4186
|
17
|
-
nkululeko/experiment.py,sha256
|
21
|
+
nkululeko/experiment.py,sha256=-W3hYNs3vPMVcBUlKbZOFmQ8gsLao-VI_IvFg5Igd_k,24480
|
18
22
|
nkululeko/explore.py,sha256=CYh9-46yUGletO7p6xj_yx0zrYJ7FoqlQg_JbVY0TD8,1969
|
19
23
|
nkululeko/feats_agender.py,sha256=46DmyXVHKL7wH77j1W82ualUmPic93t8OAIL5q_yMXI,2750
|
20
24
|
nkululeko/feats_agender_agender.py,sha256=Vwt3fynWHwmTMJtHu5ltDQhtW_VO3ImKqwGMNoxhIyI,2832
|
@@ -24,13 +28,15 @@ nkululeko/feats_audmodel_dim.py,sha256=Pd_LXvjJTGb3MAuXiFo0_50BABJnm3IdxUVkRYLEY
|
|
24
28
|
nkululeko/feats_clap.py,sha256=CHOkNzdMNK5vDld69EsBeBTkyxn_hY1d3LyU2lO3mR0,3250
|
25
29
|
nkululeko/feats_import.py,sha256=Gm-svzwNY8Qvdv91VjgjmAudroAi32v2gCC56USJtKA,2045
|
26
30
|
nkululeko/feats_mld.py,sha256=6ErExKzDpfwoLwzBBNMeIZ2G5D2ovHA7UvKRqVdF-bo,1909
|
31
|
+
nkululeko/feats_mos.py,sha256=xp51EWetc7d4pMrUP3z0j_9hErDNLMo5GetNDiItOMA,3887
|
27
32
|
nkululeko/feats_opensmile.py,sha256=PjnG0P29-yGmaMmTm8fitWm5AUOj8yEbPIT6FHQ8pPk,3786
|
28
33
|
nkululeko/feats_oxbow.py,sha256=lZLPXcUk7IcexM3O5kw-ekkYodfXE6QpS2QSHU1D6F8,4425
|
34
|
+
nkululeko/feats_pesq.py,sha256=yVNvaMDEV4CO_sqhndgXRa8K7kCi2nANFMv_816krzk,3733
|
29
35
|
nkululeko/feats_praat.py,sha256=BbjgpIO-dJ2GuVxQdaTtu8rRuaJ0dmFm9F3g_z_5SCA,3024
|
30
36
|
nkululeko/feats_snr.py,sha256=BOhO0kDxyfnSTLA6gxSYilNkRGiXhamShZJNron8Z6g,2605
|
31
37
|
nkululeko/feats_trill.py,sha256=dydoLTVZnWeps_Ur0xWNhGdAw3PxA9yFCg6exTi2zhs,2934
|
32
38
|
nkululeko/feats_wav2vec2.py,sha256=S63QmTiGlyEalO4HjacAh9KxOAVO80vZntG-Alk91WU,4005
|
33
|
-
nkululeko/feature_extractor.py,sha256=
|
39
|
+
nkululeko/feature_extractor.py,sha256=1ZAYMEZThkrcEL07ObqWWkcUsd7da_Ij2z12ozN4gyM,5111
|
34
40
|
nkululeko/featureset.py,sha256=8zYafHU_nDYV89AukfNobltCxBuoIvvRIGEXPm0PCI0,1331
|
35
41
|
nkululeko/feinberg_praat.py,sha256=fUrdVo1aJNitdG1L0sZRfIvGMJJdQwG3BXoAV5xjh5o,19472
|
36
42
|
nkululeko/file_checker.py,sha256=Nw05SIp7Ez1U9ZeFhNGz0XivwKr43hHg1WsfzKsrFPQ,3510
|
@@ -55,6 +61,7 @@ nkululeko/model_xgr.py,sha256=rOJ3QZjTLIpxHh6GqyCo-ewG9IUL35Fd-0jRDNuM_bk,242
|
|
55
61
|
nkululeko/modelrunner.py,sha256=IKvPcrvJS0voUfoFAyNAGlEhONZVb70ZEBH79hwWh_I,5380
|
56
62
|
nkululeko/nkululeko.py,sha256=lxFPzHBWTU8t7wOqYYyiJ5jzVS-AG6Dq2qsS_K7Qinw,1551
|
57
63
|
nkululeko/plots.py,sha256=Js9pBBqcQSc1zekDF0gdARHn7mRhLuBnTVkH5HW2VCU,10918
|
64
|
+
nkululeko/predict.py,sha256=3ei4wn2by0p9Vkv7cllMcszmEjSM2vX0T6x_5rlgT28,1851
|
58
65
|
nkululeko/randomsplicer.py,sha256=ZILsLHabKWbBB14l2UT9d0-b4lyuHkvvl9osHklbsoY,2674
|
59
66
|
nkululeko/randomsplicing.py,sha256=MOLwxFTM0omsVBmKAN82PmGkD4zNnxwDYoWS4WQnuVU,1867
|
60
67
|
nkululeko/reporter.py,sha256=359aeQWt0ZGLseaJnOfafYG8BrwumiM2Q58DWiaoyWQ,10177
|
@@ -66,8 +73,8 @@ nkululeko/syllable_nuclei.py,sha256=vK9dj5deqRnyEmlZmhFtKPzqKVGNCgTnWaG8UDITKNg,
|
|
66
73
|
nkululeko/test.py,sha256=BbHGliDChAXqMe2oA579dJpyZSlPGAm5997lX_POboQ,1372
|
67
74
|
nkululeko/test_predictor.py,sha256=7hbUhF09YqI7ixMp3gtVVnAO3prLF3J5tHH9mv125pM,2405
|
68
75
|
nkululeko/util.py,sha256=gZrNTF4C7hKkEMCC_hoNkEAhAViWzWebP8LsHRew7s4,9731
|
69
|
-
nkululeko-0.
|
70
|
-
nkululeko-0.
|
71
|
-
nkululeko-0.
|
72
|
-
nkululeko-0.
|
73
|
-
nkululeko-0.
|
76
|
+
nkululeko-0.58.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
77
|
+
nkululeko-0.58.0.dist-info/METADATA,sha256=oXjiP4gxGgEe8GsPo_a9nO4-VoUJrd4DLN7o11trm90,20729
|
78
|
+
nkululeko-0.58.0.dist-info/WHEEL,sha256=5sUXSg9e4bi7lTLOHcm6QEYwO5TIF1TNbTSVFVjcJcc,92
|
79
|
+
nkululeko-0.58.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
80
|
+
nkululeko-0.58.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|