nkululeko 0.59.1__py3-none-any.whl → 0.61.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. nkululeko/constants.py +1 -1
  2. nkululeko/experiment.py +43 -43
  3. nkululeko/feature_extractor.py +101 -58
  4. nkululeko/modelrunner.py +14 -14
  5. nkululeko/plots.py +11 -0
  6. nkululeko/segment.py +23 -27
  7. nkululeko/test_predictor.py +1 -1
  8. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/METADATA +13 -1
  9. nkululeko-0.61.0.dist-info/RECORD +31 -0
  10. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/WHEEL +1 -1
  11. nkululeko/ap_age.py +0 -31
  12. nkululeko/ap_arousal.py +0 -30
  13. nkululeko/ap_dominance.py +0 -29
  14. nkululeko/ap_gender.py +0 -29
  15. nkululeko/ap_mos.py +0 -35
  16. nkululeko/ap_pesq.py +0 -35
  17. nkululeko/ap_sdr.py +0 -36
  18. nkululeko/ap_snr.py +0 -35
  19. nkululeko/ap_stoi.py +0 -34
  20. nkululeko/ap_valence.py +0 -30
  21. nkululeko/augmenter.py +0 -64
  22. nkululeko/dataset.py +0 -415
  23. nkululeko/dataset_csv.py +0 -49
  24. nkululeko/dataset_ravdess.py +0 -19
  25. nkululeko/estimate_snr.py +0 -89
  26. nkululeko/feats_agender.py +0 -63
  27. nkululeko/feats_agender_agender.py +0 -65
  28. nkululeko/feats_analyser.py +0 -87
  29. nkululeko/feats_audmodel.py +0 -63
  30. nkululeko/feats_audmodel_dim.py +0 -63
  31. nkululeko/feats_clap.py +0 -74
  32. nkululeko/feats_import.py +0 -44
  33. nkululeko/feats_mld.py +0 -47
  34. nkululeko/feats_mos.py +0 -92
  35. nkululeko/feats_opensmile.py +0 -84
  36. nkululeko/feats_oxbow.py +0 -87
  37. nkululeko/feats_praat.py +0 -72
  38. nkululeko/feats_snr.py +0 -63
  39. nkululeko/feats_squim.py +0 -99
  40. nkululeko/feats_trill.py +0 -74
  41. nkululeko/feats_wav2vec2.py +0 -94
  42. nkululeko/featureset.py +0 -41
  43. nkululeko/feinberg_praat.py +0 -430
  44. nkululeko/loss_ccc.py +0 -28
  45. nkululeko/loss_softf1loss.py +0 -40
  46. nkululeko/model.py +0 -256
  47. nkululeko/model_bayes.py +0 -14
  48. nkululeko/model_cnn.py +0 -118
  49. nkululeko/model_gmm.py +0 -16
  50. nkululeko/model_knn.py +0 -16
  51. nkululeko/model_knn_reg.py +0 -16
  52. nkululeko/model_mlp.py +0 -175
  53. nkululeko/model_mlp_regression.py +0 -197
  54. nkululeko/model_svm.py +0 -18
  55. nkululeko/model_svr.py +0 -18
  56. nkululeko/model_tree.py +0 -14
  57. nkululeko/model_tree_reg.py +0 -14
  58. nkululeko/model_xgb.py +0 -12
  59. nkululeko/model_xgr.py +0 -12
  60. nkululeko/randomsplicer.py +0 -76
  61. nkululeko/randomsplicing.py +0 -74
  62. nkululeko-0.59.1.dist-info/RECORD +0 -82
  63. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/LICENSE +0 -0
  64. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/top_level.txt +0 -0
nkululeko/feats_oxbow.py DELETED
@@ -1,87 +0,0 @@
1
- # feats_oxbow.py
2
-
3
- from nkululeko.util import Util
4
- from nkululeko.featureset import Featureset
5
- import os
6
- import pandas as pd
7
- import opensmile
8
-
9
- class Openxbow(Featureset):
10
- """Class to extract openXBOW processed opensmile features (https://github.com/openXBOW)"""
11
-
12
- def __init__(self, name, data_df, is_train = False):
13
- """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
14
- super().__init__(name, data_df)
15
- self.is_train = is_train
16
-
17
- def extract(self):
18
- """Extract the features or load them from disk if present."""
19
- self.featset = self.util.config_val('FEATS', 'set', 'eGeMAPSv02')
20
- self.feature_set = eval(f'opensmile.FeatureSet.{self.featset}')
21
- store = self.util.get_path('store')
22
- storage = f'{store}{self.name}_{self.featset}.pkl'
23
- extract = self.util.config_val('FEATS', 'needs_feature_extraction', False)
24
- no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
25
- if extract or no_reuse or not os.path.isfile(storage):
26
- # extract smile features first
27
- self.util.debug('extracting openSmile features, this might take a while...')
28
- smile = opensmile.Smile(
29
- feature_set= self.feature_set,
30
- feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
31
- num_workers=5,)
32
- if isinstance(self.data_df.index, pd.MultiIndex):
33
- is_multi_index = True
34
- smile_df = smile.process_index(self.data_df.index)
35
- else:
36
- smile_df = smile.process_files(self.data_df.index)
37
- smile_df.index = smile_df.index.droplevel(1)
38
- smile_df.index = smile_df.index.droplevel(1)
39
- # compute xbow features
40
- # set some file names on disk
41
- lld_name, xbow_name, codebook_name = 'llds.csv', 'xbow.csv', 'xbow_codebook'
42
- # save the smile features
43
- smile_df.to_csv(lld_name, sep=';', header=False)
44
- # get the path of the xbow java jar file
45
- xbow_path = self.util.config_val('FEATS', 'xbow.model', '../openXBOW/')
46
- # get the size of the codebook
47
- size = self.util.config_val('FEATS', 'size', 500)
48
- # get the number of assignements
49
- assignments = self.util.config_val('FEATS', 'assignments', 10)
50
- # differentiate between train and test
51
- if self.is_train:
52
- # store the codebook
53
- os.system(f'java -jar {xbow_path}openXBOW.jar -i {lld_name} -standardizeInput -log \
54
- -o {xbow_name} -size {size} -a {assignments} -B {codebook_name}')
55
- else:
56
- # use the codebook
57
- os.system(f'java -jar {xbow_path}openXBOW.jar -i {lld_name} \
58
- -o {xbow_name} -b {codebook_name}')
59
- # read in the result from disk
60
- xbow_df = pd.read_csv(xbow_name, sep=';', header=None)
61
- # set the index
62
- xbow_df = xbow_df.set_index(self.data_df.index)
63
- # check if smile features should be added
64
- with_os = self.util.config_val('FEATS', 'with_os', False)
65
- if with_os:
66
- # extract smile functionals
67
- self.util.debug('extracting openSmile functionals, this might take a while...')
68
- smile = opensmile.Smile(
69
- feature_set= opensmile.FeatureSet.eGeMAPSv02, # always use eGemaps for this
70
- feature_level=opensmile.FeatureLevel.Functionals,
71
- num_workers=5,)
72
- if isinstance(self.data_df.index, pd.MultiIndex):
73
- is_multi_index = True
74
- smile_df = smile.process_index(self.data_df.index)
75
- else:
76
- smile_df = smile.process_files(self.data_df.index)
77
- # drop the multi index
78
- smile_df.index = smile_df.index.droplevel(1)
79
- smile_df.index = smile_df.index.droplevel(1)
80
- xbow_df = xbow_df.join(smile_df)
81
- # in any case, store to disk for later use
82
- xbow_df.to_pickle(storage)
83
- # and assign to be the "official" feature set
84
- self.df = xbow_df
85
- else:
86
- self.util.debug('reusing extracted OS features.')
87
- self.df = pd.read_pickle(storage)
nkululeko/feats_praat.py DELETED
@@ -1,72 +0,0 @@
1
- # feats_praat.py
2
- from nkululeko.featureset import Featureset
3
- import os
4
- import pandas as pd
5
- import nkululeko.glob_conf as glob_conf
6
- from nkululeko import feinberg_praat
7
- import ast
8
-
9
- class Praatset(Featureset):
10
- """
11
- a feature extractor for the Praat software, based on
12
- David R. Feinberg's Praat scripts for the parselmouth python interface.
13
- https://osf.io/6dwr3/
14
-
15
- """
16
- def __init__(self, name, data_df):
17
- super().__init__(name, data_df)
18
-
19
- def extract(self):
20
- """Extract the features based on the initialized dataset or re-open them when found on disk."""
21
- store = self.util.get_path('store')
22
- store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
23
- storage = f'{store}{self.name}.{store_format}'
24
- extract = self.util.config_val('FEATS', 'needs_feature_extraction', False)
25
- no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
26
- if extract or no_reuse or not os.path.isfile(storage):
27
- self.util.debug('extracting Praat features, this might take a while...')
28
- self.df = feinberg_praat.compute_features(self.data_df.index)
29
- self.df = self.df.set_index(self.data_df.index)
30
- for i, col in enumerate(self.df.columns):
31
- if self.df[col].isnull().values.any():
32
- self.util.debug(f'{col} includes {self.df[col].isnull().sum()} nan, inserting mean values')
33
- self.df[col] = self.df[col].fillna(self.df[col].mean())
34
-
35
- self.util.write_store(self.df, storage, store_format)
36
- try:
37
- glob_conf.config['DATA']['needs_feature_extraction'] = 'false'
38
- except KeyError:
39
- pass
40
- else:
41
- self.util.debug(f'reusing extracted Praat features: {storage}.')
42
- self.df = self.util.get_store(storage, store_format)
43
- self.util.debug(f'praat feature names: {self.df.columns}')
44
- self.df = self.df.astype(float)
45
-
46
-
47
-
48
- def extract_sample(self, signal, sr):
49
- self.util.error('feats_praat: extracting single samples not implemented yet')
50
- feats = None
51
- return feats
52
-
53
- def filter(self):
54
- # use only the features that are indexed in the target dataframes
55
- self.df = self.df[self.df.index.isin(self.data_df.index)]
56
- try:
57
- # use only some features
58
- selected_features = ast.literal_eval(glob_conf.config['FEATS']['praat.features'])
59
- self.util.debug(f'selecting features from Praat: {selected_features}')
60
- sel_feats_df = pd.DataFrame()
61
- hit = False
62
- for feat in selected_features:
63
- try:
64
- sel_feats_df[feat] = self.df[feat]
65
- hit = True
66
- except KeyError:
67
- pass
68
- if hit:
69
- self.df = sel_feats_df
70
- self.util.debug(f'new feats shape after selecting Praat features: {self.df.shape}')
71
- except KeyError:
72
- pass
nkululeko/feats_snr.py DELETED
@@ -1,63 +0,0 @@
1
- """ feats_snr.py
2
- Estimate snr (signal to noise ratio as acoustic features)
3
- """
4
- from nkululeko.util import Util
5
- from nkululeko.featureset import Featureset
6
- from nkululeko.estimate_snr import SNREstimator
7
- import os
8
- import pandas as pd
9
- import os
10
- import nkululeko.glob_conf as glob_conf
11
- import audiofile
12
-
13
- class SNRSet(Featureset):
14
- """Class to estimate snr"""
15
-
16
- def __init__(self, name, data_df):
17
- """Constructor. """
18
- super().__init__(name, data_df)
19
-
20
- def extract(self):
21
- """Estimate the features or load them from disk if present."""
22
- store = self.util.get_path('store')
23
- store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
24
- storage = f'{store}{self.name}.{store_format}'
25
- extract = self.util.config_val('FEATS', 'needs_feature_extraction', False)
26
- no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
27
- if extract or no_reuse or not os.path.isfile(storage):
28
- self.util.debug('estimating SNR, this might take a while...')
29
- snr_series = pd.Series(index = self.data_df.index, dtype=object)
30
- for idx, (file, start, end) in enumerate(self.data_df.index.to_list()):
31
- signal, sampling_rate = audiofile.read(file, offset=start.total_seconds(), duration=(end-start).total_seconds(), always_2d=True)
32
- snr = self.get_snr(signal[0], sampling_rate)
33
- snr_series[idx] = snr
34
- if idx%10==0:
35
- print('.', end='')
36
- print('')
37
- self.df = pd.DataFrame(snr_series.values.tolist(), index=self.data_df.index)
38
- self.df.columns = ['snr']
39
- self.util.write_store(self.df, storage, store_format)
40
- try:
41
- glob_conf.config['DATA']['needs_feature_extraction'] = 'false'
42
- except KeyError:
43
- pass
44
- else:
45
- self.util.debug('reusing estimated SNR values')
46
- self.df = self.util.get_store(storage, store_format)
47
-
48
- def get_snr(self, signal, sampling_rate):
49
- r"""Estimate SNR from raw audio signal.
50
- Args:
51
- signal: audio signal
52
- sampling_rate: sample rate
53
- Returns
54
- snr: estimated signal to noise ratio
55
- """
56
- snr_estimator = SNREstimator(signal, sampling_rate)
57
- estimated_snr, log_energies, energy_threshold_low, energy_threshold_high = snr_estimator.estimate_snr()
58
- return estimated_snr
59
-
60
- def extract_sample(self, signal, sr):
61
- self.init_model()
62
- feats = self.get_snr(signal, sr)
63
- return feats
nkululeko/feats_squim.py DELETED
@@ -1,99 +0,0 @@
1
- """ feats_squim.py
2
- predict SQUIM ( SPEECH QUALITY AND INTELLIGIBILITY
3
- MEASURES) features
4
-
5
-
6
- Wideband Perceptual Estimation of Speech Quality (PESQ) [2]
7
- Short-Time Objective Intelligibility (STOI) [3]
8
- Scale-Invariant Signal-to-Distortion Ratio (SI-SDR) [4]
9
-
10
-
11
- adapted from
12
- from https://pytorch.org/audio/main/tutorials/squim_tutorial.html#sphx-glr-tutorials-squim-tutorial-py
13
- paper: https://arxiv.org/pdf/2304.01448.pdf
14
-
15
- needs
16
- pip uninstall -y torch torchvision torchaudio
17
- pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
18
-
19
- """
20
- from nkululeko.util import Util
21
- from nkululeko.featureset import Featureset
22
- import os
23
- import pandas as pd
24
- import os
25
- import nkululeko.glob_conf as glob_conf
26
- import audiofile
27
- import torch
28
- import torchaudio
29
- from torchaudio.pipelines import SQUIM_OBJECTIVE
30
-
31
- class SQUIMSet(Featureset):
32
- """Class to predict SQUIM features
33
-
34
- """
35
-
36
- def __init__(self, name, data_df):
37
- """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
38
- super().__init__(name, data_df)
39
- self.device = self.util.config_val('MODEL', 'device', 'cpu')
40
- self.model_initialized = False
41
-
42
-
43
- def init_model(self):
44
- # load model
45
- self.util.debug('loading model...')
46
- self.objective_model = SQUIM_OBJECTIVE.get_model()
47
- self.model_initialized = True
48
-
49
-
50
- def extract(self):
51
- """Extract the features or load them from disk if present."""
52
- store = self.util.get_path('store')
53
- store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
54
- storage = f'{store}{self.name}.{store_format}'
55
- extract = self.util.config_val('FEATS', 'needs_feature_extraction', False)
56
- no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
57
- if extract or no_reuse or not os.path.isfile(storage):
58
- if not self.model_initialized:
59
- self.init_model()
60
- self.util.debug('predicting SQUIM, this might take a while...')
61
- emb_series = pd.Series(index = self.data_df.index, dtype=object)
62
- length = len(self.data_df.index)
63
- for idx, (file, start, end) in enumerate(self.data_df.index.to_list()):
64
- signal, sampling_rate = audiofile.read(file, offset=start.total_seconds(), duration=(end-start).total_seconds(), always_2d=True)
65
- emb = self.get_embeddings(signal, sampling_rate)
66
- emb_series[idx] = emb
67
- if idx%10==0:
68
- self.util.debug(f'SQUIM: {idx} of {length} done')
69
- self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
70
- self.df.columns = ['pesq', 'sdr', 'stoi']
71
- self.util.write_store(self.df, storage, store_format)
72
- try:
73
- glob_conf.config['DATA']['needs_feature_extraction'] = 'false'
74
- except KeyError:
75
- pass
76
- else:
77
- self.util.debug('reusing predicted SQUIM values')
78
- self.df = self.util.get_store(storage, store_format)
79
- if self.df.isnull().values.any():
80
- nanrows = self.df.columns[self.df.isna().any()].tolist()
81
- print(nanrows)
82
- self.util.error(f'got nan: {self.df.shape} {self.df.isnull().sum().sum()}')
83
-
84
-
85
- def get_embeddings(self, signal, sampling_rate):
86
- tmp_audio_name = 'squim_audio_tmp.wav'
87
- audiofile.write(tmp_audio_name, signal, sampling_rate)
88
- WAVEFORM_SPEECH, SAMPLE_RATE_SPEECH = torchaudio.load(tmp_audio_name)
89
- with torch.no_grad():
90
- stoi_hyp, pesq_hyp, si_sdr_hyp = self.objective_model(WAVEFORM_SPEECH)
91
- pesq = float(pesq_hyp[0].numpy())
92
- stoi = float(stoi_hyp[0].numpy())
93
- sdr = float(si_sdr_hyp[0].numpy())
94
- return pesq, sdr, stoi
95
-
96
- def extract_sample(self, signal, sr):
97
- self.init_model()
98
- feats = self.get_embeddings(signal, sr)
99
- return feats
nkululeko/feats_trill.py DELETED
@@ -1,74 +0,0 @@
1
- # feats_trill.py
2
- from numpy.core.numeric import tensordot
3
- from nkululeko.featureset import Featureset
4
- import pandas as pd
5
- from nkululeko.util import Util
6
- import nkululeko.glob_conf as glob_conf
7
- import audiofile as af
8
- import os
9
- import tensorflow as tf
10
- # Import TF 2.X and make sure we're running eager.
11
- assert tf.executing_eagerly()
12
- import tensorflow_hub as hub
13
-
14
- class TRILLset(Featureset):
15
- """A feature extractor for the Google TRILL embeddings"""
16
- """https://ai.googleblog.com/2020/06/improving-speech-representations-and.html"""
17
-
18
- # Initialization of the class
19
- def __init__(self, name, data_df):
20
- """
21
- Initialize the class with name, data and Util instance
22
- Also loads the model from hub
23
-
24
- :param name: Name of the class
25
- :type name: str
26
- :param data_df: Data of the class
27
- :type data_df: DataFrame
28
- :return: None
29
- """
30
- super().__init__(name, data_df)
31
- # Load the model from the configured path
32
- model_path = self.util.config_val('FEATS', 'trill.model', \
33
- 'https://tfhub.dev/google/nonsemantic-speech-benchmark/trill/3')
34
- self.module = hub.load(model_path)
35
-
36
-
37
- def extract(self):
38
- store = self.util.get_path('store')
39
- storage = f'{store}{self.name}.pkl'
40
- extract = self.util.config_val('FEATS', 'needs_feature_extraction', False)
41
- no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
42
- if extract or no_reuse or not os.path.isfile(storage):
43
- self.util.debug('extracting TRILL embeddings, this might take a while...')
44
- emb_series = pd.Series(index = self.data_df.index, dtype=object)
45
- length = len(self.data_df.index)
46
- for idx, file in enumerate(self.data_df.index.get_level_values(0)):
47
- emb = self.getEmbeddings(file)
48
- emb_series[idx] = emb
49
- if idx%10==0:
50
- self.util.debug(f'TRILL: {idx} of {length} done')
51
- self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
52
- self.df.to_pickle(storage)
53
- try:
54
- glob_conf.config['DATA']['needs_feature_extraction'] = 'false'
55
- except KeyError:
56
- pass
57
- else:
58
- self.util.debug('reusing extracted TRILL embeddings')
59
- self.df = pd.read_pickle(storage)
60
-
61
- def embed_wav(self, wav):
62
- if len(wav.shape) > 1:
63
- wav = tf.reduce_mean(wav, axis=0)
64
-
65
- emb_dict = self.module(samples=wav, sample_rate=tf.constant(16000))
66
- return emb_dict['embedding']
67
-
68
- def getEmbeddings(self, file):
69
- wav = af.read(file)[0]
70
- wav = tf.convert_to_tensor(wav)
71
- emb_short = self.embed_wav(wav)
72
- # you get one embedding per frame, we use the mean for all the frames
73
- emb_short = emb_short.numpy().mean(axis=0)
74
- return emb_short
@@ -1,94 +0,0 @@
1
- # feats_wav2vec2.py
2
-
3
- from nkululeko.util import Util
4
- from nkululeko.featureset import Featureset
5
- import os
6
- import pandas as pd
7
- import os
8
- import nkululeko.glob_conf as glob_conf
9
- import transformers
10
- from transformers.models.wav2vec2.modeling_wav2vec2 import Wav2Vec2Model
11
- import torch
12
-
13
- import audiofile
14
-
15
- class Wav2vec2(Featureset):
16
- """Class to extract wav2vec2 embeddings (https://huggingface.co/facebook/wav2vec2-large-robust-ft-swbd-300h)"""
17
-
18
- def __init__(self, name, data_df):
19
- """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
20
- super().__init__(name, data_df)
21
- self.device = self.util.config_val('MODEL', 'device', 'cpu')
22
- self.model_initialized = False
23
-
24
-
25
-
26
- def init_model(self):
27
- # load model
28
- self.util.debug('loading wav2vec model...')
29
- model_path = self.util.config_val('FEATS', 'wav2vec.model', 'wav2vec2-large-robust-ft-swbd-300h')
30
- self.processor = transformers.Wav2Vec2Processor.from_pretrained(model_path)
31
- self.model = Wav2Vec2Model.from_pretrained(model_path).to(self.device)
32
- print(f'intialized vec model on {self.device}')
33
- self.model.eval()
34
- self.model_initialized = True
35
-
36
-
37
- def extract(self):
38
- """Extract the features or load them from disk if present."""
39
- store = self.util.get_path('store')
40
- storage = f'{store}{self.name}.pkl'
41
- extract = self.util.config_val('FEATS', 'needs_feature_extraction', False)
42
- no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
43
- if extract or no_reuse or not os.path.isfile(storage):
44
- if not self.model_initialized:
45
- self.init_model()
46
- self.util.debug('extracting wav2vec2 embeddings, this might take a while...')
47
- emb_series = pd.Series(index = self.data_df.index, dtype=object)
48
- length = len(self.data_df.index)
49
- for idx, (file, start, end) in enumerate(self.data_df.index.to_list()):
50
- signal, sampling_rate = audiofile.read(file, offset=start.total_seconds(), duration=(end-start).total_seconds(), always_2d=True)
51
- #signal, sampling_rate = audiofile.read(audio_path, always_2d=True)
52
- emb = self.get_embeddings(signal, sampling_rate)
53
- emb_series[idx] = emb
54
- if idx%10==0:
55
- self.util.debug(f'Wav2vec2: {idx} of {length} done')
56
- self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
57
- self.df.to_pickle(storage)
58
- try:
59
- glob_conf.config['DATA']['needs_feature_extraction'] = 'false'
60
- except KeyError:
61
- pass
62
- else:
63
- self.util.debug('reusing extracted wav2vec2 embeddings')
64
- self.df = pd.read_pickle(storage)
65
- if self.df.isnull().values.any():
66
- nanrows = self.df.columns[self.df.isna().any()].tolist()
67
- print(nanrows)
68
- self.util.error(f'got nan: {self.df.shape} {self.df.isnull().sum().sum()}')
69
-
70
-
71
- def get_embeddings(self, signal, sampling_rate):
72
- r"""Extract embeddings from raw audio signal."""
73
- with torch.no_grad():
74
- # run through processor to normalize signal
75
- # always returns a batch, so we just get the first entry
76
- # then we put it on the device
77
- y = self.processor(signal, sampling_rate=sampling_rate)
78
- y = y['input_values'][0]
79
- y = torch.from_numpy(y).to(self.device)
80
-
81
- # run through model
82
- # first entry contains hidden state
83
- y = self.model(y)[0]
84
-
85
- # pool result and convert to numpy
86
- y = torch.mean(y, dim=1)
87
- y = y.detach().cpu().numpy()
88
-
89
- return y.flatten()
90
-
91
- def extract_sample(self, signal, sr):
92
- self.init_model()
93
- feats = self.get_embeddings(signal, sr)
94
- return feats
nkululeko/featureset.py DELETED
@@ -1,41 +0,0 @@
1
- # featureset.py
2
- import pandas as pd
3
- from nkululeko.util import Util
4
- import nkululeko.glob_conf as glob_conf
5
- import ast
6
-
7
- class Featureset:
8
- name = '' # designation
9
- df = None # pandas dataframe to store the features (and indexed with the data from the sets)
10
- data_df = None # dataframe to get audio paths
11
-
12
-
13
- def __init__(self, name, data_df):
14
- self.name = name
15
- self.data_df = data_df
16
- self.util = Util('featureset')
17
-
18
- def extract(self):
19
- pass
20
-
21
- def filter(self):
22
- # use only the features that are indexed in the target dataframes
23
- self.df = self.df[self.df.index.isin(self.data_df.index)]
24
- try:
25
- # use only some features
26
- selected_features = ast.literal_eval(glob_conf.config['FEATS']['features'])
27
- self.util.debug(f'selecting features: {selected_features}')
28
- sel_feats_df = pd.DataFrame()
29
- hit = False
30
- for feat in selected_features:
31
- try:
32
- sel_feats_df[feat] = self.df[feat]
33
- hit = True
34
- except KeyError:
35
- pass
36
- if hit:
37
- self.df = sel_feats_df
38
- self.util.debug(f'new feats shape after selecting features: {self.df.shape}')
39
- except KeyError:
40
- pass
41
-