nkululeko 0.81.6__py3-none-any.whl → 0.81.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.81.6"
1
+ VERSION="0.81.7"
2
2
  SAMPLING_RATE = 16000
@@ -9,16 +9,17 @@ import numpy as np
9
9
  import audinterface
10
10
 
11
11
 
12
- class AudModelAgenderSet(Featureset):
12
+ class AgenderSet(Featureset):
13
13
  """
14
14
  Embeddings from the wav2vec2. based model finetuned on agender data, described in the paper
15
15
  "Speech-based Age and Gender Prediction with Transformers"
16
16
  https://arxiv.org/abs/2306.16962
17
17
  """
18
18
 
19
- def __init__(self, name, data_df):
20
- super().__init__(name, data_df)
19
+ def __init__(self, name, data_df, feats_type):
20
+ super().__init__(name, data_df, feats_type)
21
21
  self.model_loaded = False
22
+ self.feats_type = feats_type
22
23
 
23
24
  def _load_model(self):
24
25
  model_url = "https://zenodo.org/record/7761387/files/w2v2-L-robust-6-age-gender.25c844af-1.1.1.zip"
@@ -28,7 +29,8 @@ class AudModelAgenderSet(Featureset):
28
29
  if not os.path.isdir(model_root):
29
30
  cache_root = audeer.mkdir("cache")
30
31
  model_root = audeer.mkdir(model_root)
31
- archive_path = audeer.download_url(model_url, cache_root, verbose=True)
32
+ archive_path = audeer.download_url(
33
+ model_url, cache_root, verbose=True)
32
34
  audeer.extract_archive(archive_path, model_root)
33
35
  device = self.util.config_val("MODEL", "device", "cpu")
34
36
  self.model = audonnx.load(model_root, device=device)
@@ -21,9 +21,10 @@ class AuddimSet(Featureset):
21
21
  https://arxiv.org/abs/2203.07378.
22
22
  """
23
23
 
24
- def __init__(self, name, data_df):
25
- super().__init__(name, data_df)
24
+ def __init__(self, name, data_df, feats_type):
25
+ super().__init__(name, data_df, feats_type)
26
26
  self.model_loaded = False
27
+ self.feats_types = feats_type
27
28
 
28
29
  def _load_model(self):
29
30
  model_url = "https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip"
@@ -31,7 +32,8 @@ class AuddimSet(Featureset):
31
32
  if not os.path.isdir(model_root):
32
33
  cache_root = audeer.mkdir("cache")
33
34
  model_root = audeer.mkdir(model_root)
34
- archive_path = audeer.download_url(model_url, cache_root, verbose=True)
35
+ archive_path = audeer.download_url(
36
+ model_url, cache_root, verbose=True)
35
37
  audeer.extract_archive(archive_path, model_root)
36
38
  cuda = "cuda" if torch.cuda.is_available() else "cpu"
37
39
  device = self.util.config_val("MODEL", "device", cuda)
@@ -19,9 +19,10 @@ class AudmodelSet(Featureset):
19
19
  https://arxiv.org/abs/2203.07378.
20
20
  """
21
21
 
22
- def __init__(self, name, data_df):
23
- super().__init__(name, data_df)
22
+ def __init__(self, name, data_df, feats_type):
23
+ super().__init__(name, data_df, feats_type)
24
24
  self.model_loaded = False
25
+ self.feats_type = feats_type
25
26
 
26
27
  def _load_model(self):
27
28
  model_url = "https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip"
@@ -29,7 +30,8 @@ class AudmodelSet(Featureset):
29
30
  if not os.path.isdir(model_root):
30
31
  cache_root = audeer.mkdir("cache")
31
32
  model_root = audeer.mkdir(model_root)
32
- archive_path = audeer.download_url(model_url, cache_root, verbose=True)
33
+ archive_path = audeer.download_url(
34
+ model_url, cache_root, verbose=True)
33
35
  audeer.extract_archive(archive_path, model_root)
34
36
  cuda = "cuda" if torch.cuda.is_available() else "cpu"
35
37
  device = self.util.config_val("MODEL", "device", cuda)
@@ -11,14 +11,15 @@ import laion_clap
11
11
  import audiofile
12
12
 
13
13
 
14
- class Clap(Featureset):
14
+ class ClapSet(Featureset):
15
15
  """Class to extract laion's clap embeddings (https://github.com/LAION-AI/CLAP)"""
16
16
 
17
- def __init__(self, name, data_df):
17
+ def __init__(self, name, data_df, feats_type):
18
18
  """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
19
- super().__init__(name, data_df)
19
+ super().__init__(name, data_df, feats_type)
20
20
  self.device = self.util.config_val("MODEL", "device", "cpu")
21
21
  self.model_initialized = False
22
+ self.feat_type = feats_type
22
23
 
23
24
  def init_model(self):
24
25
  # load model
@@ -32,12 +33,14 @@ class Clap(Featureset):
32
33
  store = self.util.get_path("store")
33
34
  store_format = self.util.config_val("FEATS", "store_format", "pkl")
34
35
  storage = f"{store}{self.name}.{store_format}"
35
- extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
36
+ extract = self.util.config_val(
37
+ "FEATS", "needs_feature_extraction", False)
36
38
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
37
39
  if extract or no_reuse or not os.path.isfile(storage):
38
40
  if not self.model_initialized:
39
41
  self.init_model()
40
- self.util.debug("extracting clap embeddings, this might take a while...")
42
+ self.util.debug(
43
+ "extracting clap embeddings, this might take a while...")
41
44
  emb_series = pd.Series(index=self.data_df.index, dtype=object)
42
45
  length = len(self.data_df.index)
43
46
  for idx, (file, start, end) in enumerate(
@@ -51,7 +54,8 @@ class Clap(Featureset):
51
54
  )
52
55
  emb = self.get_embeddings(signal, sampling_rate)
53
56
  emb_series[idx] = emb
54
- self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
57
+ self.df = pd.DataFrame(
58
+ emb_series.values.tolist(), index=self.data_df.index)
55
59
  self.util.write_store(self.df, storage, store_format)
56
60
  try:
57
61
  glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
@@ -1,6 +1,7 @@
1
1
  # feats_hubert.py
2
2
  # HuBERT feature extractor for Nkululeko
3
- # example feat_type = "hubert-large-ll60k", "hubert-xlarge-ll60k"
3
+ # example feat_type = "hubert-large-ll60k", "hubert-xlarge-ll60k",
4
+ # "hubert-base-ls960", hubert-large-ls960-ft", "hubert-xlarge-ls960-ft"
4
5
 
5
6
 
6
7
  import os
@@ -22,7 +23,7 @@ class Hubert(Featureset):
22
23
  def __init__(self, name, data_df, feat_type):
23
24
  """Constructor. is_train is needed to distinguish from test/dev sets,
24
25
  because they use the codebook from the training"""
25
- super().__init__(name, data_df)
26
+ super().__init__(name, data_df, feat_type)
26
27
  # check if device is not set, use cuda if available
27
28
  cuda = "cuda" if torch.cuda.is_available() else "cpu"
28
29
  self.device = self.util.config_val("MODEL", "device", cuda)
@@ -11,8 +11,8 @@ from nkululeko.feat_extract.featureset import Featureset
11
11
  class ImportSet(Featureset):
12
12
  """Class to import features that have been compiled elsewhere"""
13
13
 
14
- def __init__(self, name, data_df):
15
- super().__init__(name, data_df)
14
+ def __init__(self, name, data_df, feats_type):
15
+ super().__init__(name, data_df, feats_type)
16
16
 
17
17
  def extract(self):
18
18
  """Import the features."""
@@ -27,9 +27,9 @@ from nkululeko.feat_extract.featureset import Featureset
27
27
  class MosSet(Featureset):
28
28
  """Class to predict MOS (mean opinion score)"""
29
29
 
30
- def __init__(self, name, data_df):
30
+ def __init__(self, name, data_df, feats_type):
31
31
  """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
32
- super().__init__(name, data_df)
32
+ super().__init__(name, data_df, feats_type)
33
33
  self.device = self.util.config_val("MODEL", "device", "cpu")
34
34
  self.model_initialized = False
35
35
 
@@ -8,31 +8,21 @@ import opensmile
8
8
 
9
9
 
10
10
  class Opensmileset(Featureset):
11
- def __init__(self, name, data_df):
12
- super().__init__(name, data_df)
11
+ def __init__(self, name, data_df, feats_type=None, config_file=None):
12
+ super().__init__(name, data_df, feats_type)
13
13
  self.featset = self.util.config_val("FEATS", "set", "eGeMAPSv02")
14
14
  try:
15
15
  self.feature_set = eval(f"opensmile.FeatureSet.{self.featset}")
16
- #'eGeMAPSv02, ComParE_2016, GeMAPSv01a, eGeMAPSv01a':
16
+ # 'eGeMAPSv02, ComParE_2016, GeMAPSv01a, eGeMAPSv01a':
17
17
  except AttributeError:
18
- self.util.error(
19
- f"something is wrong with feature set: {self.featset}"
20
- )
18
+ self.util.error(f"something is wrong with feature set: {self.featset}")
21
19
  self.featlevel = self.util.config_val("FEATS", "level", "functionals")
22
20
  try:
23
- self.featlevel = self.featlevel.replace(
24
- "lld", "LowLevelDescriptors"
25
- )
26
- self.featlevel = self.featlevel.replace(
27
- "functionals", "Functionals"
28
- )
29
- self.feature_level = eval(
30
- f"opensmile.FeatureLevel.{self.featlevel}"
31
- )
21
+ self.featlevel = self.featlevel.replace("lld", "LowLevelDescriptors")
22
+ self.featlevel = self.featlevel.replace("functionals", "Functionals")
23
+ self.feature_level = eval(f"opensmile.FeatureLevel.{self.featlevel}")
32
24
  except AttributeError:
33
- self.util.error(
34
- f"something is wrong with feature level: {self.featlevel}"
35
- )
25
+ self.util.error(f"something is wrong with feature level: {self.featlevel}")
36
26
 
37
27
  def extract(self):
38
28
  """Extract the features based on the initialized dataset or re-open them when found on disk."""
@@ -44,9 +34,7 @@ class Opensmileset(Featureset):
44
34
  )
45
35
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
46
36
  if extract or not os.path.isfile(storage) or no_reuse:
47
- self.util.debug(
48
- "extracting openSmile features, this might take a while..."
49
- )
37
+ self.util.debug("extracting openSmile features, this might take a while...")
50
38
  smile = opensmile.Smile(
51
39
  feature_set=self.feature_set,
52
40
  feature_level=self.feature_level,
@@ -85,9 +73,7 @@ class Opensmileset(Featureset):
85
73
  selected_features = ast.literal_eval(
86
74
  glob_conf.config["FEATS"]["os.features"]
87
75
  )
88
- self.util.debug(
89
- f"selecting features from opensmile: {selected_features}"
90
- )
76
+ self.util.debug(f"selecting features from opensmile: {selected_features}")
91
77
  sel_feats_df = pd.DataFrame()
92
78
  hit = False
93
79
  for feat in selected_features:
@@ -10,9 +10,10 @@ import opensmile
10
10
  class Openxbow(Featureset):
11
11
  """Class to extract openXBOW processed opensmile features (https://github.com/openXBOW)"""
12
12
 
13
- def __init__(self, name, data_df, is_train=False):
13
+ def __init__(self, name, data_df, feats_type, is_train=False):
14
14
  """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
15
- super().__init__(name, data_df)
15
+ super().__init__(name, data_df, feats_type)
16
+ self.feats_types = feats_type
16
17
  self.is_train = is_train
17
18
 
18
19
  def extract(self):
@@ -21,11 +22,13 @@ class Openxbow(Featureset):
21
22
  self.feature_set = eval(f"opensmile.FeatureSet.{self.featset}")
22
23
  store = self.util.get_path("store")
23
24
  storage = f"{store}{self.name}_{self.featset}.pkl"
24
- extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
25
+ extract = self.util.config_val(
26
+ "FEATS", "needs_feature_extraction", False)
25
27
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
26
28
  if extract or no_reuse or not os.path.isfile(storage):
27
29
  # extract smile features first
28
- self.util.debug("extracting openSmile features, this might take a while...")
30
+ self.util.debug(
31
+ "extracting openSmile features, this might take a while...")
29
32
  smile = opensmile.Smile(
30
33
  feature_set=self.feature_set,
31
34
  feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
@@ -48,7 +51,13 @@ class Openxbow(Featureset):
48
51
  # save the smile features
49
52
  smile_df.to_csv(lld_name, sep=";", header=False)
50
53
  # get the path of the xbow java jar file
51
- xbow_path = self.util.config_val("FEATS", "xbow.model", "../openXBOW/")
54
+ xbow_path = self.util.config_val(
55
+ "FEATS", "xbow.model", "openXBOW")
56
+ # check if JAR file exist
57
+ if not os.path.isfile(f"{xbow_path}/openXBOW.jar"):
58
+ # download using wget if not exist and locate in xbow_path
59
+ os.system(
60
+ f"git clone https://github.com/openXBOW/openXBOW")
52
61
  # get the size of the codebook
53
62
  size = self.util.config_val("FEATS", "size", 500)
54
63
  # get the number of assignements
@@ -57,16 +66,12 @@ class Openxbow(Featureset):
57
66
  if self.is_train:
58
67
  # store the codebook
59
68
  os.system(
60
- f"java -jar {xbow_path}openXBOW.jar -i"
61
- f" {lld_name} -standardizeInput -log -o"
62
- f" {xbow_name} -size {size} -a {assignments} -B"
63
- f" {codebook_name}"
69
+ f"java -jar {xbow_path}/openXBOW.jar -i {lld_name} -standardizeInput -log -o {xbow_name} -size {size} -a {assignments} -B {codebook_name}"
64
70
  )
65
71
  else:
66
72
  # use the codebook
67
73
  os.system(
68
- f"java -jar {xbow_path}openXBOW.jar -i {lld_name} "
69
- f" -o {xbow_name} -b {codebook_name}"
74
+ f"java -jar {xbow_path}/openXBOW.jar -i {lld_name} -o {xbow_name} -b {codebook_name}"
70
75
  )
71
76
  # read in the result from disk
72
77
  xbow_df = pd.read_csv(xbow_name, sep=";", header=None)
@@ -18,18 +18,20 @@ class PraatSet(Featureset):
18
18
 
19
19
  """
20
20
 
21
- def __init__(self, name, data_df):
22
- super().__init__(name, data_df)
21
+ def __init__(self, name, data_df, feats_type):
22
+ super().__init__(name, data_df, feats_type)
23
23
 
24
24
  def extract(self):
25
25
  """Extract the features based on the initialized dataset or re-open them when found on disk."""
26
26
  store = self.util.get_path("store")
27
27
  store_format = self.util.config_val("FEATS", "store_format", "pkl")
28
28
  storage = f"{store}{self.name}.{store_format}"
29
- extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
29
+ extract = self.util.config_val(
30
+ "FEATS", "needs_feature_extraction", False)
30
31
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
31
32
  if extract or no_reuse or not os.path.isfile(storage):
32
- self.util.debug("extracting Praat features, this might take a while...")
33
+ self.util.debug(
34
+ "extracting Praat features, this might take a while...")
33
35
  self.df = feinberg_praat.compute_features(self.data_df.index)
34
36
  self.df = self.df.set_index(self.data_df.index)
35
37
  for i, col in enumerate(self.df.columns):
@@ -52,7 +54,8 @@ class PraatSet(Featureset):
52
54
  self.df = self.df.astype(float)
53
55
 
54
56
  def extract_sample(self, signal, sr):
55
- import audiofile, audformat
57
+ import audiofile
58
+ import audformat
56
59
 
57
60
  tmp_audio_names = ["praat_audio_tmp.wav"]
58
61
  audiofile.write(tmp_audio_names[0], signal, sr)
@@ -4,6 +4,7 @@ feats_spectra.py
4
4
  Inspired by code from Su Lei
5
5
 
6
6
  """
7
+
7
8
  import os
8
9
  import torchaudio
9
10
  import torchaudio.transforms as T
@@ -23,9 +24,9 @@ import nkululeko.glob_conf as glob_conf
23
24
 
24
25
 
25
26
  class Spectraloader(Featureset):
26
- def __init__(self, name, data_df):
27
+ def __init__(self, name, data_df, feat_type):
27
28
  """Constructor setting the name"""
28
- Featureset.__init__(self, name, data_df)
29
+ super().__init__(name, data_df, feat_type)
29
30
  self.sampling_rate = SAMPLING_RATE
30
31
  self.num_bands = int(self.util.config_val("FEATS", "fft_nbands", "64"))
31
32
  self.win_dur = int(self.util.config_val("FEATS", "fft_win_dur", "25"))
@@ -30,9 +30,9 @@ from nkululeko.utils.util import Util
30
30
  class SquimSet(Featureset):
31
31
  """Class to predict SQUIM features"""
32
32
 
33
- def __init__(self, name, data_df):
33
+ def __init__(self, name, data_df, feats_type):
34
34
  """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
35
- super().__init__(name, data_df)
35
+ super().__init__(name, data_df, feats_type)
36
36
  self.device = self.util.config_val("MODEL", "device", "cpu")
37
37
  self.model_initialized = False
38
38
 
@@ -1,4 +1,5 @@
1
1
  # feats_trill.py
2
+ import tensorflow_hub as hub
2
3
  import os
3
4
  import tensorflow as tf
4
5
  from numpy.core.numeric import tensordot
@@ -11,7 +12,6 @@ from nkululeko.feat_extract.featureset import Featureset
11
12
 
12
13
  # Import TF 2.X and make sure we're running eager.
13
14
  assert tf.executing_eagerly()
14
- import tensorflow_hub as hub
15
15
 
16
16
 
17
17
  class TRILLset(Featureset):
@@ -20,7 +20,7 @@ class TRILLset(Featureset):
20
20
  """https://ai.googleblog.com/2020/06/improving-speech-representations-and.html"""
21
21
 
22
22
  # Initialization of the class
23
- def __init__(self, name, data_df):
23
+ def __init__(self, name, data_df, feats_type):
24
24
  """
25
25
  Initialize the class with name, data and Util instance
26
26
  Also loads the model from hub
@@ -31,7 +31,7 @@ class TRILLset(Featureset):
31
31
  :type data_df: DataFrame
32
32
  :return: None
33
33
  """
34
- super().__init__(name, data_df)
34
+ super().__init__(name, data_df, feats_type)
35
35
  # Load the model from the configured path
36
36
  model_path = self.util.config_val(
37
37
  "FEATS",
@@ -39,20 +39,24 @@ class TRILLset(Featureset):
39
39
  "https://tfhub.dev/google/nonsemantic-speech-benchmark/trill/3",
40
40
  )
41
41
  self.module = hub.load(model_path)
42
+ self.feats_type = feats_type
42
43
 
43
44
  def extract(self):
44
45
  store = self.util.get_path("store")
45
46
  storage = f"{store}{self.name}.pkl"
46
- extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
47
+ extract = self.util.config_val(
48
+ "FEATS", "needs_feature_extraction", False)
47
49
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
48
50
  if extract or no_reuse or not os.path.isfile(storage):
49
- self.util.debug("extracting TRILL embeddings, this might take a while...")
51
+ self.util.debug(
52
+ "extracting TRILL embeddings, this might take a while...")
50
53
  emb_series = pd.Series(index=self.data_df.index, dtype=object)
51
54
  length = len(self.data_df.index)
52
55
  for idx, file in enumerate(tqdm(self.data_df.index.get_level_values(0))):
53
56
  emb = self.getEmbeddings(file)
54
57
  emb_series[idx] = emb
55
- self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
58
+ self.df = pd.DataFrame(
59
+ emb_series.values.tolist(), index=self.data_df.index)
56
60
  self.df.to_pickle(storage)
57
61
  try:
58
62
  glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
@@ -1,5 +1,11 @@
1
- # feats_wav2vec2.py
2
- # feat_types example = wav2vec2-large-robust-ft-swbd-300h
1
+ """ feats_wav2vec2.py
2
+ feat_types example = [wav2vec2-large-robust-ft-swbd-300h,
3
+ wav2vec2-xls-r-2b, wav2vec2-large, wav2vec2-large-xlsr-53, wav2vec2-base]
4
+
5
+ Complete list: https://huggingface.co/facebook?search_models=wav2vec2
6
+ Currently only supports wav2vec2
7
+ """
8
+
3
9
  import os
4
10
  from tqdm import tqdm
5
11
  import pandas as pd
@@ -16,11 +22,11 @@ class Wav2vec2(Featureset):
16
22
 
17
23
  def __init__(self, name, data_df, feat_type):
18
24
  """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
19
- super().__init__(name, data_df)
25
+ super().__init__(name, data_df, feat_type)
20
26
  cuda = "cuda" if torch.cuda.is_available() else "cpu"
21
27
  self.device = self.util.config_val("MODEL", "device", cuda)
22
28
  self.model_initialized = False
23
- if feat_type == "wav2vec" or feat_type == "wav2vec2":
29
+ if feat_type == "wav2vec2":
24
30
  self.feat_type = "wav2vec2-large-robust-ft-swbd-300h"
25
31
  else:
26
32
  self.feat_type = feat_type
@@ -33,7 +39,8 @@ class Wav2vec2(Featureset):
33
39
  )
34
40
  config = transformers.AutoConfig.from_pretrained(model_path)
35
41
  layer_num = config.num_hidden_layers
36
- hidden_layer = int(self.util.config_val("FEATS", "wav2vec2.layer", "0"))
42
+ hidden_layer = int(self.util.config_val(
43
+ "FEATS", "wav2vec2.layer", "0"))
37
44
  config.num_hidden_layers = layer_num - hidden_layer
38
45
  self.util.debug(f"using hidden layer #{config.num_hidden_layers}")
39
46
  self.processor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
@@ -48,7 +55,8 @@ class Wav2vec2(Featureset):
48
55
  """Extract the features or load them from disk if present."""
49
56
  store = self.util.get_path("store")
50
57
  storage = f"{store}{self.name}.pkl"
51
- extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
58
+ extract = self.util.config_val(
59
+ "FEATS", "needs_feature_extraction", False)
52
60
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
53
61
  if extract or no_reuse or not os.path.isfile(storage):
54
62
  if not self.model_initialized:
@@ -69,7 +77,8 @@ class Wav2vec2(Featureset):
69
77
  emb = self.get_embeddings(signal, sampling_rate, file)
70
78
  emb_series[idx] = emb
71
79
  # print(f"emb_series shape: {emb_series.shape}")
72
- self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
80
+ self.df = pd.DataFrame(
81
+ emb_series.values.tolist(), index=self.data_df.index)
73
82
  # print(f"df shape: {self.df.shape}")
74
83
  self.df.to_pickle(storage)
75
84
  try:
@@ -59,10 +59,7 @@ class Wavlm(Featureset):
59
59
  frame_offset=int(start.total_seconds() * 16000),
60
60
  num_frames=int((end - start).total_seconds() * 16000),
61
61
  )
62
- if sampling_rate != 16000:
63
- self.util.error(
64
- f"sampling rate should be 16000 but is {sampling_rate}"
65
- )
62
+ assert sampling_rate == 16000, f"sampling rate should be 16000 but is {sampling_rate}"
66
63
  emb = self.get_embeddings(signal, sampling_rate, file)
67
64
  emb_series.iloc[idx] = emb
68
65
  self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
@@ -0,0 +1,110 @@
1
+ # feats_whisper.py
2
+ import os
3
+
4
+ import pandas as pd
5
+ import torch
6
+ from transformers import AutoFeatureExtractor
7
+ from transformers import WhisperModel
8
+
9
+ import audeer
10
+ import audiofile
11
+
12
+ from nkululeko.feat_extract.featureset import Featureset
13
+ import nkululeko.glob_conf as glob_conf
14
+
15
+
16
+ class Whisper(Featureset):
17
+ """Class to extract whisper embeddings."""
18
+
19
+ def __init__(self, name, data_df, feat_type):
20
+ super().__init__(name, data_df, feat_type)
21
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
22
+ self.device = self.util.config_val("MODEL", "device", cuda)
23
+ self.model_initialized = False
24
+ if feat_type == "whisper":
25
+ self.feat_type = "whisper-base"
26
+ else:
27
+ self.feat_type = feat_type
28
+
29
+ def init_model(self):
30
+ # load model
31
+ self.util.debug("loading whisper model...")
32
+ model_name = f"openai/{self.feat_type}"
33
+ self.model = WhisperModel.from_pretrained(model_name).to(self.device)
34
+ print(f"intialized Whisper model on {self.device}")
35
+ self.feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
36
+ self.model_initialized = True
37
+
38
+ def extract(self):
39
+ """Extract the features or load them from disk if present."""
40
+ store = self.util.get_path("store")
41
+ storage = f"{store}{self.name}.pkl"
42
+ extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
43
+ no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
44
+ if extract or no_reuse or not os.path.isfile(storage):
45
+ if not self.model_initialized:
46
+ self.init_model()
47
+ self.util.debug("extracting whisper embeddings, this might take a while...")
48
+ emb_series = []
49
+ for (file, start, end), _ in audeer.progress_bar(
50
+ self.data_df.iterrows(),
51
+ total=len(self.data_df),
52
+ desc=f"Running whisper on {len(self.data_df)} audiofiles",
53
+ ):
54
+ if end == pd.NaT:
55
+ signal, sr = audiofile.read(file, offset=start)
56
+ else:
57
+ signal, sr = audiofile.read(
58
+ file, duration=end - start, offset=start
59
+ )
60
+ emb = self.get_embeddings(signal, sr, file)
61
+ emb_series.append(emb)
62
+ # print(f"emb_series shape: {emb_series.shape}")
63
+ self.df = pd.DataFrame(emb_series, index=self.data_df.index)
64
+ # print(f"df shape: {self.df.shape}")
65
+ self.df.to_pickle(storage)
66
+ try:
67
+ glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
68
+ except KeyError:
69
+ pass
70
+ else:
71
+ self.util.debug("reusing extracted wav2vec2 embeddings")
72
+ self.df = pd.read_pickle(storage)
73
+ if self.df.isnull().values.any():
74
+ nanrows = self.df.columns[self.df.isna().any()].tolist()
75
+ # print(nanrows)
76
+ self.util.error(
77
+ f"got nan: {self.df.shape} {self.df.isnull().sum().sum()}"
78
+ )
79
+
80
+ def get_embeddings(self, signal, sampling_rate, file):
81
+ r"""Extract embeddings from raw audio signal."""
82
+ try:
83
+ with torch.no_grad():
84
+ embed_size = self.model.config.hidden_size
85
+ embed_columns = [f"whisper_{i}" for i in range(embed_size)]
86
+ inputs = self.feature_extractor(signal, sampling_rate=16000)[
87
+ "input_features"
88
+ ][0]
89
+ inputs = torch.from_numpy(inputs).to(self.device).unsqueeze(0)
90
+ decoder_input_ids = (
91
+ torch.tensor([[1, 1]]).to(self.device)
92
+ * self.model.config.decoder_start_token_id
93
+ )
94
+ full_outputs = self.model(
95
+ inputs,
96
+ decoder_input_ids=decoder_input_ids,
97
+ output_hidden_states=True,
98
+ )
99
+ outputs = full_outputs.encoder_last_hidden_state[0]
100
+ average_embeds = outputs.squeeze().mean(axis=0).cpu().detach().numpy()
101
+ except RuntimeError as re:
102
+ print(str(re))
103
+ self.util.error(f"couldn't extract file: {file}")
104
+ # print(f"y flattened shape: {y.ravel().shape}")
105
+ return average_embeds
106
+
107
+ def extract_sample(self, signal, sr):
108
+ self.init_model()
109
+ feats = self.get_embeddings(signal, sr, "no file")
110
+ return feats
@@ -7,13 +7,15 @@ import ast
7
7
 
8
8
  class Featureset:
9
9
  name = "" # designation
10
- df = None # pandas dataframe to store the features (and indexed with the data from the sets)
10
+ df = None # pandas dataframe to store the features
11
+ # (and indexed with the data from the sets)
11
12
  data_df = None # dataframe to get audio paths
12
13
 
13
- def __init__(self, name, data_df):
14
+ def __init__(self, name, data_df, feats_type):
14
15
  self.name = name
15
16
  self.data_df = data_df
16
17
  self.util = Util("featureset")
18
+ self.feats_types = feats_type
17
19
 
18
20
  def extract(self):
19
21
  pass
@@ -23,7 +25,8 @@ class Featureset:
23
25
  self.df = self.df[self.df.index.isin(self.data_df.index)]
24
26
  try:
25
27
  # use only some features
26
- selected_features = ast.literal_eval(glob_conf.config["FEATS"]["features"])
28
+ selected_features = ast.literal_eval(
29
+ glob_conf.config["FEATS"]["features"])
27
30
  self.util.debug(f"selecting features: {selected_features}")
28
31
  sel_feats_df = pd.DataFrame()
29
32
  hit = False
@@ -53,7 +53,7 @@ class FeatureExtractor:
53
53
  if feat_extractor_class is None:
54
54
  self.util.error(f"unknown feats_type: {feats_type}")
55
55
  return feat_extractor_class(
56
- f"{store_name}_{self.feats_designation}", self.data_df
56
+ f"{store_name}_{self.feats_designation}", self.data_df, feats_type
57
57
  )
58
58
 
59
59
  def _get_feat_extractor_class(self, feats_type):
@@ -61,16 +61,27 @@ class FeatureExtractor:
61
61
  from nkululeko.feat_extract.feats_opensmile import Opensmileset
62
62
 
63
63
  return Opensmileset
64
+
64
65
  elif feats_type == "spectra":
65
66
  from nkululeko.feat_extract.feats_spectra import Spectraloader
66
67
 
67
68
  return Spectraloader
69
+
68
70
  elif feats_type == "trill":
69
71
  from nkululeko.feat_extract.feats_trill import TRILLset
70
72
 
71
73
  return TRILLset
72
- elif feats_type.startswith(("wav2vec", "hubert", "wavlm", "spkrec")):
74
+
75
+ elif feats_type.startswith(
76
+ ("wav2vec2", "hubert", "wavlm", "spkrec", "whisper")
77
+ ):
73
78
  return self._get_feat_extractor_by_prefix(feats_type)
79
+
80
+ elif feats_type == "xbow":
81
+ from nkululeko.feat_extract.feats_oxbow import Openxbow
82
+
83
+ return Openxbow
84
+
74
85
  elif feats_type in (
75
86
  "audmodel",
76
87
  "auddim",
@@ -89,11 +100,11 @@ class FeatureExtractor:
89
100
  return None
90
101
 
91
102
  def _get_feat_extractor_by_prefix(self, feats_type):
92
- prefix, _, ext = feats_type.partition("_")
103
+ prefix, _, ext = feats_type.partition("-")
93
104
  from importlib import import_module
94
105
 
95
106
  module = import_module(f"nkululeko.feat_extract.feats_{prefix.lower()}")
96
- class_name = f"{prefix.capitalize()}{ext.capitalize()}set"
107
+ class_name = f"{prefix.capitalize()}"
97
108
  return getattr(module, class_name)
98
109
 
99
110
  def _get_feat_extractor_by_name(self, feats_type):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.81.6
3
+ Version: 0.81.7
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -323,6 +323,11 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
323
323
  Changelog
324
324
  =========
325
325
 
326
+ Version 0.81.7
327
+ --------------
328
+ * bugfixes
329
+ * added whisper feature extractor
330
+
326
331
  Version 0.81.6
327
332
  --------------
328
333
  * updated documentation
@@ -2,14 +2,14 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
3
  nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=QwuK_rtVCEaN-oZZnh3s104ROC_O7hpEGY_ZMDKtRcw,39
5
+ nkululeko/constants.py,sha256=7yZ6tYUvMMX3FdTsBGzuH-Hgw5ALAhmDCAiKRrOESM0,39
6
6
  nkululeko/demo.py,sha256=55kNFA2helMhOxD4yZuKg1JWDtlUUpxm-6uAnroIydI,3264
7
7
  nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
8
8
  nkululeko/demo_predictor.py,sha256=-ggSHc3DXxRzjzcGB4qFBOMvKsfUdTkkde50BDrS9dA,4755
9
9
  nkululeko/experiment.py,sha256=CSEvQxK2_tzJyND5sUHQSc6MkRp1g6EVam8JX8txqps,29576
10
10
  nkululeko/explore.py,sha256=2wdoGRqldvsN1zCiWk0quSDgHHHUoF2UZOWQ1r-2OLM,2310
11
11
  nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
12
- nkululeko/feature_extractor.py,sha256=4UIvfh0m54286Y8q28aYBy-ojTFi3bWiActwBeAg_yE,3814
12
+ nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
13
13
  nkululeko/file_checker.py,sha256=LoLnL8aHpW-axMQ46qbqrManTs5otG9ShpEZuz9iRSk,3474
14
14
  nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
15
15
  nkululeko/glob_conf.py,sha256=iHiVSxDYgmYwdx6z0HuGUMSWrfZfufPHxHb60q2dLRY,453
@@ -47,27 +47,28 @@ nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
47
  nkululeko/data/dataset.py,sha256=JGzMD6HIvkFkYBekmbmslIKc5ADaCj06T-8gpqH_kFo,27650
48
48
  nkululeko/data/dataset_csv.py,sha256=v3lSjF23EVjoP460QOfhdcqbWAlBQWlBOuaYujZoS4s,3407
49
49
  nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- nkululeko/feat_extract/feats_agender.py,sha256=w13UsYsUTzMe5B2Rkg0sfvBXrVBBo0-Ljo532zkDXgM,3043
50
+ nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
51
51
  nkululeko/feat_extract/feats_agender_agender.py,sha256=5dA7YA-YGxODovMC7ynMk3bnpPjfs0ApvSfjqvoSZY0,3346
52
52
  nkululeko/feat_extract/feats_analyser.py,sha256=_5oz4y-NZCEBgfNP2GZ9WNqQR50Hbykm0TvDVomWP0U,11399
53
- nkululeko/feat_extract/feats_auddim.py,sha256=lMhKbEfnA0qKjRgMFx1xYpEhKCB8TZeFn8AJs_oqkvE,3083
54
- nkululeko/feat_extract/feats_audmodel.py,sha256=AO5BcrZ0QRD7--64WZq73KWvInM-8dVfgrLDQfq4sZ0,3109
55
- nkululeko/feat_extract/feats_clap.py,sha256=v82mbjdjGDSKUUBggttw7jW0oka22fWAmfUf-4VmaDU,3379
56
- nkululeko/feat_extract/feats_hubert.py,sha256=uL-9mgQHuGPQi1nuUaw6aNU9DscsO89uJAmBdmnCegM,5205
57
- nkululeko/feat_extract/feats_import.py,sha256=LMxFXSO2Ui7Jj2t9oBotqWe-je8rnDgkX4ay6StNrGk,1598
53
+ nkululeko/feat_extract/feats_auddim.py,sha256=VlzKKXTXa5kjLgQBWyEFy-daIyU1SkOwCCOIhKsWCvE,3162
54
+ nkululeko/feat_extract/feats_audmodel.py,sha256=VjBNgAoxsHJhwr6Kwt9CxX6SaCM4RK_OV-GU2W5-bhU,3187
55
+ nkululeko/feat_extract/feats_clap.py,sha256=nR6eEIRdsMHcfmD1bNtt5WfDvkxKjvEbukSSrXHm-HU,3489
56
+ nkululeko/feat_extract/feats_hubert.py,sha256=ebj5PJtj-DcMudtnBWeY3_d_9pPFeEDEtP6NMDXIZNI,5289
57
+ nkululeko/feat_extract/feats_import.py,sha256=rj1p8lz19tCAC8hLzzZAwZ0M6gzwH3BzfabFUgal0yw,1622
58
58
  nkululeko/feat_extract/feats_mld.py,sha256=Vvu7GZOkn7Vda8eIOXqHjg78zegkFe3vTUaCXyVM0eA,2021
59
- nkululeko/feat_extract/feats_mos.py,sha256=SgsEw6_niVTUNO1tj92eUHBxKOeIcSHpYJBuHFvbJY8,4150
60
- nkululeko/feat_extract/feats_opensmile.py,sha256=yDRGSiUQV3K3oLxVqq8Cxj5bkc-RiLzDYbAGKC9I5vc,4140
61
- nkululeko/feat_extract/feats_oxbow.py,sha256=7W26NbEJnSckZzedolsIW1PJPSdCHhuh8YM19kOxaMA,4734
62
- nkululeko/feat_extract/feats_praat.py,sha256=6VCEU264bV-1lTuwfn0oCbQJ96J2WbyOU724Opg2_Ms,3037
59
+ nkululeko/feat_extract/feats_mos.py,sha256=KXNt7QYEfxkvr6UyVhig2aWQBaIvovlrR4gPuP03gmo,4174
60
+ nkululeko/feat_extract/feats_opensmile.py,sha256=vLY8HCpeOj9NdJXzt_GVI3Vxwsjf9cEfcqJ3IHqlTQY,3978
61
+ nkululeko/feat_extract/feats_oxbow.py,sha256=CmIG9cbHTJTJVnzgCPdQpYpnlewWExpsr5ZcK8Malyo,4980
62
+ nkululeko/feat_extract/feats_praat.py,sha256=kZrS6srzH7WoWEd2prp1Dxw6g9JklFQGTNq5zzPpHzg,3105
63
63
  nkululeko/feat_extract/feats_snr.py,sha256=9dqZ-4RpK98iJEssM3ttozNd18LWlZYM_QVXvp5xDcs,2829
64
- nkululeko/feat_extract/feats_spectra.py,sha256=PLKoc_S3v3wibodUCiOnFFdF87U2rk2sfndRo2mmG64,3656
64
+ nkululeko/feat_extract/feats_spectra.py,sha256=5Pex8awIQC3cjQRHSu4NQFmg4quamG0RL3V3Yd0pJHs,3670
65
65
  nkululeko/feat_extract/feats_spkrec.py,sha256=VK4ma3uWzM0YZStsgRTirfkbzjWIfRWSgsYI038QlRY,4803
66
- nkululeko/feat_extract/feats_squim.py,sha256=dDsWlTfXJeUnUD5XSVw4cfuf3XJ-MHfBHxx3xyFR5mE,4504
67
- nkululeko/feat_extract/feats_trill.py,sha256=PpygJK_W6QoBNeSah9npQPiQlJxLWFn6TSOaZUYehNU,3211
68
- nkululeko/feat_extract/feats_wav2vec2.py,sha256=sFf-WkLUgKUQsFxGO9m2hS3uYoGkv95mZavCEZyWFGA,5072
69
- nkululeko/feat_extract/feats_wavlm.py,sha256=RhI0oWIsknnxTVmdnNS_xJO1NnUUR0CUNDWH1yTpNLk,4683
70
- nkululeko/feat_extract/featureset.py,sha256=-ynkdor8iX7BFx10aIbB3LfwxrrzPoBGz9kXwyAJO9M,1375
66
+ nkululeko/feat_extract/feats_squim.py,sha256=jToXiwRq5-MQheAP6xczvry1uVIHYUrD8bM7Wb1cnqM,4528
67
+ nkululeko/feat_extract/feats_trill.py,sha256=HXQBaPWTX0iNEjBY7RD8uyFeYjDieHqv8ZilE0Jb-Pg,3319
68
+ nkululeko/feat_extract/feats_wav2vec2.py,sha256=tFGe4t6MIVxTDQYR8geNCtZ_3ALc-gpi-rmQbF74HfI,5285
69
+ nkululeko/feat_extract/feats_wavlm.py,sha256=8afzqZgHwDRrlHh4y5jnop4objURpXU_IrfiK6orsew,4604
70
+ nkululeko/feat_extract/feats_whisper.py,sha256=0N7Vj65OVi2PNoB_NrDjWT5lP6xZNKxFOZZIoxkJvcA,4533
71
+ nkululeko/feat_extract/featureset.py,sha256=HtgW2389rmlRAgFP3F1sSFzq2_iUVr2NhOfIXG9omt0,1448
71
72
  nkululeko/feat_extract/feinberg_praat.py,sha256=EP9pMALjlKdiYInLQdrZ7MmE499Mq-ISRCgqbqL3Rxc,21304
72
73
  nkululeko/losses/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
74
  nkululeko/losses/loss_ccc.py,sha256=NOK0y0fxKUnU161B5geap6Fmn8QzoPl2MqtPiV8IuJE,976
@@ -102,8 +103,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
102
103
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
103
104
  nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
104
105
  nkululeko/utils/util.py,sha256=_Z6OMJ3f-8TdETW9eqJYY5hwNRS5XCt9azzRnqoTTZE,12330
105
- nkululeko-0.81.6.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
106
- nkululeko-0.81.6.dist-info/METADATA,sha256=zYBbwBbVfPQFWfN8yjyjNGYmI6GMYsyeRoeb8FfS3gs,34905
107
- nkululeko-0.81.6.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
108
- nkululeko-0.81.6.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
109
- nkululeko-0.81.6.dist-info/RECORD,,
106
+ nkululeko-0.81.7.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
107
+ nkululeko-0.81.7.dist-info/METADATA,sha256=7P8gRtSvPadRGBsWRhT34-Xj8jwkbL7OcLJ__AGtoQs,34981
108
+ nkululeko-0.81.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
109
+ nkululeko-0.81.7.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
110
+ nkululeko-0.81.7.dist-info/RECORD,,