nkululeko 0.57.0__tar.gz → 0.58.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {nkululeko-0.57.0 → nkululeko-0.58.0}/CHANGELOG.md +6 -0
  2. {nkululeko-0.57.0/nkululeko.egg-info → nkululeko-0.58.0}/PKG-INFO +15 -3
  3. {nkululeko-0.57.0 → nkululeko-0.58.0}/README.md +8 -2
  4. nkululeko-0.58.0/nkululeko/ap_dominance.py +29 -0
  5. nkululeko-0.58.0/nkululeko/ap_mos.py +35 -0
  6. nkululeko-0.58.0/nkululeko/ap_pesq.py +35 -0
  7. nkululeko-0.58.0/nkululeko/constants.py +1 -0
  8. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/dataset.py +0 -1
  9. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/experiment.py +15 -2
  10. nkululeko-0.58.0/nkululeko/feats_mos.py +92 -0
  11. nkululeko-0.58.0/nkululeko/feats_pesq.py +89 -0
  12. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/feature_extractor.py +6 -0
  13. {nkululeko-0.57.0 → nkululeko-0.58.0/nkululeko.egg-info}/PKG-INFO +15 -3
  14. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko.egg-info/SOURCES.txt +5 -0
  15. nkululeko-0.57.0/nkululeko/constants.py +0 -1
  16. {nkululeko-0.57.0 → nkululeko-0.58.0}/LICENSE +0 -0
  17. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/__init__.py +0 -0
  18. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/ap_age.py +0 -0
  19. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/ap_arousal.py +0 -0
  20. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/ap_gender.py +0 -0
  21. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/ap_snr.py +0 -0
  22. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/ap_valence.py +0 -0
  23. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/augment.py +0 -0
  24. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/augmenter.py +0 -0
  25. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/balancer.py +0 -0
  26. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/cacheddataset.py +0 -0
  27. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/dataset_csv.py +0 -0
  28. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/dataset_ravdess.py +0 -0
  29. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/demo.py +0 -0
  30. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/demo_predictor.py +0 -0
  31. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/estimate_snr.py +0 -0
  32. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/explore.py +0 -0
  33. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/feats_agender.py +0 -0
  34. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/feats_agender_agender.py +0 -0
  35. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/feats_analyser.py +0 -0
  36. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/feats_audmodel.py +0 -0
  37. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/feats_audmodel_dim.py +0 -0
  38. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/feats_clap.py +0 -0
  39. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/feats_import.py +0 -0
  40. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/feats_mld.py +0 -0
  41. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/feats_opensmile.py +0 -0
  42. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/feats_oxbow.py +0 -0
  43. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/feats_praat.py +0 -0
  44. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/feats_snr.py +0 -0
  45. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/feats_trill.py +0 -0
  46. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/feats_wav2vec2.py +0 -0
  47. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/featureset.py +0 -0
  48. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/feinberg_praat.py +0 -0
  49. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/file_checker.py +0 -0
  50. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/filter_data.py +0 -0
  51. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/glob_conf.py +0 -0
  52. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/loss_ccc.py +0 -0
  53. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/loss_softf1loss.py +0 -0
  54. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/model.py +0 -0
  55. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/model_bayes.py +0 -0
  56. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/model_cnn.py +0 -0
  57. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/model_gmm.py +0 -0
  58. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/model_knn.py +0 -0
  59. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/model_knn_reg.py +0 -0
  60. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/model_mlp.py +0 -0
  61. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/model_mlp_regression.py +0 -0
  62. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/model_svm.py +0 -0
  63. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/model_svr.py +0 -0
  64. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/model_tree.py +0 -0
  65. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/model_tree_reg.py +0 -0
  66. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/model_xgb.py +0 -0
  67. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/model_xgr.py +0 -0
  68. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/modelrunner.py +0 -0
  69. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/nkululeko.py +0 -0
  70. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/plots.py +0 -0
  71. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/predict.py +0 -0
  72. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/randomsplicer.py +0 -0
  73. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/randomsplicing.py +0 -0
  74. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/reporter.py +0 -0
  75. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/result.py +0 -0
  76. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/runmanager.py +0 -0
  77. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/scaler.py +0 -0
  78. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/segment.py +0 -0
  79. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/syllable_nuclei.py +0 -0
  80. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/test.py +0 -0
  81. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/test_predictor.py +0 -0
  82. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko/util.py +0 -0
  83. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko.egg-info/dependency_links.txt +0 -0
  84. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko.egg-info/requires.txt +0 -0
  85. {nkululeko-0.57.0 → nkululeko-0.58.0}/nkululeko.egg-info/top_level.txt +0 -0
  86. {nkululeko-0.57.0 → nkululeko-0.58.0}/pyproject.toml +0 -0
  87. {nkululeko-0.57.0 → nkululeko-0.58.0}/setup.cfg +0 -0
  88. {nkululeko-0.57.0 → nkululeko-0.58.0}/setup.py +0 -0
@@ -1,6 +1,12 @@
1
1
  Changelog
2
2
  =========
3
3
 
4
+ Version 0.58.0
5
+ --------------
6
+ * added dominance predict
7
+ * added MOS predict
8
+ * added PESQ predict
9
+
4
10
  Version 0.57.0
5
11
  --------------
6
12
  * renamed autopredict predict
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.57.0
3
+ Version: 0.58.0
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -17,6 +17,7 @@ License-File: LICENSE
17
17
  # Nkululeko
18
18
  * [Overview](#overview)
19
19
  * [Installation](#installation)
20
+ * [Documentation](https://nkululeko.readthedocs.io)
20
21
  * [Usage](#usage)
21
22
  * [Hello World](#hello-world-example)
22
23
  * [Licence](#licence)
@@ -66,7 +67,11 @@ Sometimes you only want to take a look at your data:
66
67
 
67
68
  <img src="meta/images/data_plot.png" width="500px"/>
68
69
 
69
- ## Installatione
70
+
71
+ ## Documentation
72
+ The documentation, along with extensions of installation, usage, INI file format, and examples, can be found [nkululeko.readthedocs.io](https://nkululeko.readthedocs.io).
73
+
74
+ ## Installation
70
75
 
71
76
  Create and activate a virtual Python environment and simply run
72
77
  ```
@@ -106,10 +111,11 @@ Read the [Hello World example](#hello-world-example) for initial usage with Emo-
106
111
 
107
112
  Here is an overview of the interfaces:
108
113
  * **nkululeko.nkululeko**: doing experiments
109
- * **nkululeko.demo**: demo the current best model on command line
114
+ * **nkululeko.demo**: demo the current best model on the command line
110
115
  * **nkululeko.test**: predict a series of files with the current best model
111
116
  * **nkululeko.explore**: perform data exploration
112
117
  * **nkululeko.augment**: augment the current training data
118
+ * **nkululeko.predict**: predict a series of files with a given model
113
119
 
114
120
  Alternatively, there is a central "experiment" class that can be used by own experiments
115
121
 
@@ -217,6 +223,12 @@ Nkululeko can be used under the [MIT license](https://choosealicense.com/license
217
223
  Changelog
218
224
  =========
219
225
 
226
+ Version 0.58.0
227
+ --------------
228
+ * added dominance predict
229
+ * added MOS predict
230
+ * added PESQ predict
231
+
220
232
  Version 0.57.0
221
233
  --------------
222
234
  * renamed autopredict predict
@@ -1,6 +1,7 @@
1
1
  # Nkululeko
2
2
  * [Overview](#overview)
3
3
  * [Installation](#installation)
4
+ * [Documentation](https://nkululeko.readthedocs.io)
4
5
  * [Usage](#usage)
5
6
  * [Hello World](#hello-world-example)
6
7
  * [Licence](#licence)
@@ -50,7 +51,11 @@ Sometimes you only want to take a look at your data:
50
51
 
51
52
  <img src="meta/images/data_plot.png" width="500px"/>
52
53
 
53
- ## Installatione
54
+
55
+ ## Documentation
56
+ The documentation, along with extensions of installation, usage, INI file format, and examples, can be found [nkululeko.readthedocs.io](https://nkululeko.readthedocs.io).
57
+
58
+ ## Installation
54
59
 
55
60
  Create and activate a virtual Python environment and simply run
56
61
  ```
@@ -90,10 +95,11 @@ Read the [Hello World example](#hello-world-example) for initial usage with Emo-
90
95
 
91
96
  Here is an overview of the interfaces:
92
97
  * **nkululeko.nkululeko**: doing experiments
93
- * **nkululeko.demo**: demo the current best model on command line
98
+ * **nkululeko.demo**: demo the current best model on the command line
94
99
  * **nkululeko.test**: predict a series of files with the current best model
95
100
  * **nkululeko.explore**: perform data exploration
96
101
  * **nkululeko.augment**: augment the current training data
102
+ * **nkululeko.predict**: predict a series of files with a given model
97
103
 
98
104
  Alternatively, there is a central "experiment" class that can be used by own experiments
99
105
 
@@ -0,0 +1,29 @@
1
+ """
2
+ A predictor for emotional dominance.
3
+ Currently based on audEERING's emotional dimension model.
4
+ """
5
+
6
+ from nkululeko.util import Util
7
+ from nkululeko.feature_extractor import FeatureExtractor
8
+ import ast
9
+ import nkululeko.glob_conf as glob_conf
10
+ class DominancePredictor:
11
+ """
12
+ DominancePredictor
13
+ predicting dominance with the audEERING emotional dimension model
14
+ """
15
+
16
+ def __init__(self, df):
17
+ self.df = df
18
+ self.util = Util('dominancePredictor')
19
+
20
+ def predict(self, split_selection):
21
+ self.util.debug(f'predicting dominance for {split_selection} samples')
22
+ feats_name = "_".join(ast.literal_eval(glob_conf.config['DATA']['databases']))
23
+ self.feature_extractor = FeatureExtractor(self.df, ['auddim'], feats_name, split_selection)
24
+ pred_df = self.feature_extractor.extract()
25
+ pred_vals = pred_df.dominance * 1000
26
+ return_df = self.df.copy()
27
+ return_df['dominance_pred'] = pred_vals.astype('int')/1000
28
+
29
+ return return_df
@@ -0,0 +1,35 @@
1
+ """"
2
+ A predictor for MOS - mean opinion score.
3
+ """
4
+ from nkululeko.util import Util
5
+ import ast
6
+ import nkululeko.glob_conf as glob_conf
7
+ from nkululeko.feature_extractor import FeatureExtractor
8
+ import numpy as np
9
+
10
+
11
+ class MOSPredictor:
12
+ """
13
+ MOSPredictor
14
+ predicting MOS
15
+
16
+ """
17
+ def __init__(self, df):
18
+ self.df = df
19
+ self.util = Util('mosPredictor')
20
+
21
+
22
+ def predict(self, split_selection):
23
+ self.util.debug(f'estimating MOS for {split_selection} samples')
24
+ return_df = self.df.copy()
25
+ feats_name = "_".join(ast.literal_eval(glob_conf.config['DATA']['databases']))
26
+ self.feature_extractor = FeatureExtractor(self.df, ['mos'], feats_name, split_selection)
27
+ result_df = self.feature_extractor.extract()
28
+ # replace missing values by 0
29
+ result_df = result_df.fillna(0)
30
+ result_df = result_df.replace(np.nan, 0)
31
+ result_df.replace([np.inf, -np.inf], 0, inplace=True)
32
+ pred_snr = result_df.mos * 100
33
+ return_df['mos_pred'] = pred_snr.astype('int')/100
34
+ return return_df
35
+
@@ -0,0 +1,35 @@
1
+ """"
2
+ A predictor for PESQ - Perceptual Evaluation of Speech Quality.
3
+ """
4
+ from nkululeko.util import Util
5
+ import ast
6
+ import nkululeko.glob_conf as glob_conf
7
+ from nkululeko.feature_extractor import FeatureExtractor
8
+ import numpy as np
9
+
10
+
11
+ class PESQPredictor:
12
+ """
13
+ PESQPredictor
14
+ predicting PESQ
15
+
16
+ """
17
+ def __init__(self, df):
18
+ self.df = df
19
+ self.util = Util('pesqPredictor')
20
+
21
+
22
+ def predict(self, split_selection):
23
+ self.util.debug(f'estimating PESQ for {split_selection} samples')
24
+ return_df = self.df.copy()
25
+ feats_name = "_".join(ast.literal_eval(glob_conf.config['DATA']['databases']))
26
+ self.feature_extractor = FeatureExtractor(self.df, ['pesq'], feats_name, split_selection)
27
+ result_df = self.feature_extractor.extract()
28
+ # replace missing values by 0
29
+ result_df = result_df.fillna(0)
30
+ result_df = result_df.replace(np.nan, 0)
31
+ result_df.replace([np.inf, -np.inf], 0, inplace=True)
32
+ pred_vals = result_df.pesq * 100
33
+ return_df['pesq_pred'] = pred_vals.astype('int')/100
34
+ return return_df
35
+
@@ -0,0 +1 @@
1
+ VERSION = '0.58.0'
@@ -67,7 +67,6 @@ class Dataset:
67
67
  # store the dataframe
68
68
  store = self.util.get_path('store')
69
69
  store_file = f'{store}{self.name}.pkl'
70
- self.util.debug(f'{self.name}: loading ...')
71
70
  self.root = self._load_db()
72
71
  # self.got_speaker, self.got_gender = False, False
73
72
  if not self.start_fresh and os.path.isfile(store_file):
@@ -82,7 +82,8 @@ class Experiment:
82
82
  self.got_speaker = True
83
83
  self.datasets.update({d: data})
84
84
  self.target = self.util.config_val('DATA', 'target', 'emotion')
85
- self.util.debug(f'loaded databases {self.datasets.keys()}')
85
+ dbs = ','.join(list(self.datasets.keys()))
86
+ self.util.debug(f'loaded databases {dbs}')
86
87
 
87
88
  def _import_csv(self, storage):
88
89
  # df = pd.read_csv(storage, header=0, index_col=[0,1,2])
@@ -353,6 +354,14 @@ class Experiment:
353
354
  from nkululeko.ap_snr import SNRPredictor
354
355
  predictor = SNRPredictor(df)
355
356
  df = predictor.predict(sample_selection)
357
+ elif target == 'mos':
358
+ from nkululeko.ap_mos import MOSPredictor
359
+ predictor = MOSPredictor(df)
360
+ df = predictor.predict(sample_selection)
361
+ elif target == 'pesq':
362
+ from nkululeko.ap_pesq import PESQPredictor
363
+ predictor = PESQPredictor(df)
364
+ df = predictor.predict(sample_selection)
356
365
  elif target == 'arousal':
357
366
  from nkululeko.ap_arousal import ArousalPredictor
358
367
  predictor = ArousalPredictor(df)
@@ -361,8 +370,12 @@ class Experiment:
361
370
  from nkululeko.ap_valence import ValencePredictor
362
371
  predictor = ValencePredictor(df)
363
372
  df = predictor.predict(sample_selection)
373
+ elif target == 'dominance':
374
+ from nkululeko.ap_dominance import DominancePredictor
375
+ predictor = DominancePredictor(df)
376
+ df = predictor.predict(sample_selection)
364
377
  else:
365
- self.util.error(f'unkown auto predict target: {target}')
378
+ self.util.error(f'unknown auto predict target: {target}')
366
379
  return df
367
380
 
368
381
  def random_splice(self):
@@ -0,0 +1,92 @@
1
+ """ feats_mos.py
2
+ predict MOS (mean opinion score)
3
+
4
+ adapted from
5
+ from https://pytorch.org/audio/main/tutorials/squim_tutorial.html#sphx-glr-tutorials-squim-tutorial-py
6
+ paper: https://arxiv.org/pdf/2304.01448.pdf
7
+
8
+ needs
9
+ pip uninstall -y torch torchvision torchaudio
10
+ pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
11
+
12
+ """
13
+ from nkululeko.util import Util
14
+ from nkululeko.featureset import Featureset
15
+ import os
16
+ import pandas as pd
17
+ import os
18
+ import nkululeko.glob_conf as glob_conf
19
+ import audiofile
20
+ import torch
21
+ import torchaudio
22
+ from torchaudio.pipelines import SQUIM_SUBJECTIVE
23
+ from torchaudio.utils import download_asset
24
+
25
+ class MOSSet(Featureset):
26
+ """Class to predict MOS (mean opinion score)
27
+
28
+ """
29
+
30
+ def __init__(self, name, data_df):
31
+ """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
32
+ super().__init__(name, data_df)
33
+ self.device = self.util.config_val('MODEL', 'device', 'cpu')
34
+ self.model_initialized = False
35
+
36
+
37
+ def init_model(self):
38
+ # load model
39
+ self.util.debug('loading MOS model...')
40
+ self.subjective_model = SQUIM_SUBJECTIVE.get_model()
41
+ NMR_SPEECH = download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav")
42
+ self.WAVEFORM_NMR, SAMPLE_RATE_NMR = torchaudio.load(NMR_SPEECH)
43
+ self.model_initialized = True
44
+
45
+ def extract(self):
46
+ """Extract the features or load them from disk if present."""
47
+ store = self.util.get_path('store')
48
+ store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
49
+ storage = f'{store}{self.name}.{store_format}'
50
+ extract = self.util.config_val('FEATS', 'needs_feature_extraction', False)
51
+ no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
52
+ if extract or no_reuse or not os.path.isfile(storage):
53
+ if not self.model_initialized:
54
+ self.init_model()
55
+ self.util.debug('predicting MOS, this might take a while...')
56
+ emb_series = pd.Series(index = self.data_df.index, dtype=object)
57
+ length = len(self.data_df.index)
58
+ for idx, (file, start, end) in enumerate(self.data_df.index.to_list()):
59
+ signal, sampling_rate = audiofile.read(file, offset=start.total_seconds(), duration=(end-start).total_seconds(), always_2d=True)
60
+ emb = self.get_embeddings(signal, sampling_rate)
61
+ emb_series[idx] = emb
62
+ if idx%10==0:
63
+ self.util.debug(f'MOS: {idx} of {length} done')
64
+ self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
65
+ self.df.columns = ['mos']
66
+ self.util.write_store(self.df, storage, store_format)
67
+ try:
68
+ glob_conf.config['DATA']['needs_feature_extraction'] = 'false'
69
+ except KeyError:
70
+ pass
71
+ else:
72
+ self.util.debug('reusing predicted MOS values')
73
+ self.df = self.util.get_store(storage, store_format)
74
+ if self.df.isnull().values.any():
75
+ nanrows = self.df.columns[self.df.isna().any()].tolist()
76
+ print(nanrows)
77
+ self.util.error(f'got nan: {self.df.shape} {self.df.isnull().sum().sum()}')
78
+
79
+
80
+ def get_embeddings(self, signal, sampling_rate):
81
+ tmp_audio_name = 'mos_audio_tmp.wav'
82
+ audiofile.write(tmp_audio_name, signal, sampling_rate)
83
+ WAVEFORM_SPEECH, SAMPLE_RATE_SPEECH = torchaudio.load(tmp_audio_name)
84
+ with torch.no_grad():
85
+ mos = self.subjective_model(WAVEFORM_SPEECH, self.WAVEFORM_NMR)
86
+ return float(mos[0].numpy())
87
+
88
+
89
+ def extract_sample(self, signal, sr):
90
+ self.init_model()
91
+ feats = self.get_embeddings(signal, sr)
92
+ return feats
@@ -0,0 +1,89 @@
1
+ """ feats_pesq.py
2
+ predict PESQ (Perceptual Evaluation of Speech Quality)
3
+
4
+ adapted from
5
+ from https://pytorch.org/audio/main/tutorials/squim_tutorial.html#sphx-glr-tutorials-squim-tutorial-py
6
+ paper: https://arxiv.org/pdf/2304.01448.pdf
7
+
8
+ needs
9
+ pip uninstall -y torch torchvision torchaudio
10
+ pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
11
+
12
+ """
13
+ from nkululeko.util import Util
14
+ from nkululeko.featureset import Featureset
15
+ import os
16
+ import pandas as pd
17
+ import os
18
+ import nkululeko.glob_conf as glob_conf
19
+ import audiofile
20
+ import torch
21
+ import torchaudio
22
+ from torchaudio.pipelines import SQUIM_OBJECTIVE
23
+
24
+ class PESQSet(Featureset):
25
+ """Class to predict PESQ (Perceptual Evaluation of Speech Quality)
26
+
27
+ """
28
+
29
+ def __init__(self, name, data_df):
30
+ """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
31
+ super().__init__(name, data_df)
32
+ self.device = self.util.config_val('MODEL', 'device', 'cpu')
33
+ self.model_initialized = False
34
+
35
+
36
+ def init_model(self):
37
+ # load model
38
+ self.util.debug('loading model...')
39
+ self.objective_model = SQUIM_OBJECTIVE.get_model()
40
+ self.model_initialized = True
41
+
42
+
43
+ def extract(self):
44
+ """Extract the features or load them from disk if present."""
45
+ store = self.util.get_path('store')
46
+ store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
47
+ storage = f'{store}{self.name}.{store_format}'
48
+ extract = self.util.config_val('FEATS', 'needs_feature_extraction', False)
49
+ no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
50
+ if extract or no_reuse or not os.path.isfile(storage):
51
+ if not self.model_initialized:
52
+ self.init_model()
53
+ self.util.debug('predicting PESQ, this might take a while...')
54
+ emb_series = pd.Series(index = self.data_df.index, dtype=object)
55
+ length = len(self.data_df.index)
56
+ for idx, (file, start, end) in enumerate(self.data_df.index.to_list()):
57
+ signal, sampling_rate = audiofile.read(file, offset=start.total_seconds(), duration=(end-start).total_seconds(), always_2d=True)
58
+ emb = self.get_embeddings(signal, sampling_rate)
59
+ emb_series[idx] = emb
60
+ if idx%10==0:
61
+ self.util.debug(f'PESQ: {idx} of {length} done')
62
+ self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
63
+ self.df.columns = ['pesq']
64
+ self.util.write_store(self.df, storage, store_format)
65
+ try:
66
+ glob_conf.config['DATA']['needs_feature_extraction'] = 'false'
67
+ except KeyError:
68
+ pass
69
+ else:
70
+ self.util.debug('reusing predicted PESQ values')
71
+ self.df = self.util.get_store(storage, store_format)
72
+ if self.df.isnull().values.any():
73
+ nanrows = self.df.columns[self.df.isna().any()].tolist()
74
+ print(nanrows)
75
+ self.util.error(f'got nan: {self.df.shape} {self.df.isnull().sum().sum()}')
76
+
77
+
78
+ def get_embeddings(self, signal, sampling_rate):
79
+ tmp_audio_name = 'pesq_audio_tmp.wav'
80
+ audiofile.write(tmp_audio_name, signal, sampling_rate)
81
+ WAVEFORM_SPEECH, SAMPLE_RATE_SPEECH = torchaudio.load(tmp_audio_name)
82
+ with torch.no_grad():
83
+ stoi_hyp, pesq_hyp, si_sdr_hyp = self.objective_model(WAVEFORM_SPEECH)
84
+ return float(pesq_hyp[0].numpy())
85
+
86
+ def extract_sample(self, signal, sr):
87
+ self.init_model()
88
+ feats = self.get_embeddings(signal, sr)
89
+ return feats
@@ -61,6 +61,12 @@ class FeatureExtractor:
61
61
  elif feats_type=='snr':
62
62
  from nkululeko.feats_snr import SNRSet
63
63
  self.featExtractor = SNRSet(f'{store_name}_{self.feats_designation}', self.data_df)
64
+ elif feats_type=='mos':
65
+ from nkululeko.feats_mos import MOSSet
66
+ self.featExtractor = MOSSet(f'{store_name}_{self.feats_designation}', self.data_df)
67
+ elif feats_type=='pesq':
68
+ from nkululeko.feats_pesq import PESQSet
69
+ self.featExtractor = PESQSet(f'{store_name}_{self.feats_designation}', self.data_df)
64
70
  elif feats_type=='clap':
65
71
  from nkululeko.feats_clap import Clap
66
72
  self.featExtractor = Clap(f'{store_name}_{self.feats_designation}', self.data_df)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.57.0
3
+ Version: 0.58.0
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -17,6 +17,7 @@ License-File: LICENSE
17
17
  # Nkululeko
18
18
  * [Overview](#overview)
19
19
  * [Installation](#installation)
20
+ * [Documentation](https://nkululeko.readthedocs.io)
20
21
  * [Usage](#usage)
21
22
  * [Hello World](#hello-world-example)
22
23
  * [Licence](#licence)
@@ -66,7 +67,11 @@ Sometimes you only want to take a look at your data:
66
67
 
67
68
  <img src="meta/images/data_plot.png" width="500px"/>
68
69
 
69
- ## Installatione
70
+
71
+ ## Documentation
72
+ The documentation, along with extensions of installation, usage, INI file format, and examples, can be found [nkululeko.readthedocs.io](https://nkululeko.readthedocs.io).
73
+
74
+ ## Installation
70
75
 
71
76
  Create and activate a virtual Python environment and simply run
72
77
  ```
@@ -106,10 +111,11 @@ Read the [Hello World example](#hello-world-example) for initial usage with Emo-
106
111
 
107
112
  Here is an overview of the interfaces:
108
113
  * **nkululeko.nkululeko**: doing experiments
109
- * **nkululeko.demo**: demo the current best model on command line
114
+ * **nkululeko.demo**: demo the current best model on the command line
110
115
  * **nkululeko.test**: predict a series of files with the current best model
111
116
  * **nkululeko.explore**: perform data exploration
112
117
  * **nkululeko.augment**: augment the current training data
118
+ * **nkululeko.predict**: predict a series of files with a given model
113
119
 
114
120
  Alternatively, there is a central "experiment" class that can be used by own experiments
115
121
 
@@ -217,6 +223,12 @@ Nkululeko can be used under the [MIT license](https://choosealicense.com/license
217
223
  Changelog
218
224
  =========
219
225
 
226
+ Version 0.58.0
227
+ --------------
228
+ * added dominance predict
229
+ * added MOS predict
230
+ * added PESQ predict
231
+
220
232
  Version 0.57.0
221
233
  --------------
222
234
  * renamed autopredict predict
@@ -7,7 +7,10 @@ setup.py
7
7
  nkululeko/__init__.py
8
8
  nkululeko/ap_age.py
9
9
  nkululeko/ap_arousal.py
10
+ nkululeko/ap_dominance.py
10
11
  nkululeko/ap_gender.py
12
+ nkululeko/ap_mos.py
13
+ nkululeko/ap_pesq.py
11
14
  nkululeko/ap_snr.py
12
15
  nkululeko/ap_valence.py
13
16
  nkululeko/augment.py
@@ -31,8 +34,10 @@ nkululeko/feats_audmodel_dim.py
31
34
  nkululeko/feats_clap.py
32
35
  nkululeko/feats_import.py
33
36
  nkululeko/feats_mld.py
37
+ nkululeko/feats_mos.py
34
38
  nkululeko/feats_opensmile.py
35
39
  nkululeko/feats_oxbow.py
40
+ nkululeko/feats_pesq.py
36
41
  nkululeko/feats_praat.py
37
42
  nkululeko/feats_snr.py
38
43
  nkululeko/feats_trill.py
@@ -1 +0,0 @@
1
- VERSION = '0.57.0'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes