nkululeko 0.82.2__py3-none-any.whl → 0.82.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.82.2"
1
+ VERSION="0.82.4"
2
2
  SAMPLING_RATE = 16000
@@ -22,7 +22,16 @@ class Dataset_CSV(Dataset):
22
22
  # data_file = os.path.join(exp_root, data_file)
23
23
  root = os.path.dirname(data_file)
24
24
  audio_path = self.util.config_val_data(self.name, "audio_path", "")
25
- df = audformat.utils.read_csv(data_file)
25
+ df = pd.read_csv(data_file)
26
+ # special treatment for segmented dataframes with only one column:
27
+ if "start" in df.columns and len(df.columns) == 4:
28
+ index = audformat.segmented_index(
29
+ df.file.values, df.start.values, df.end.values
30
+ )
31
+ df = df.set_index(index)
32
+ df = df.drop(columns=["file", "start", "end"])
33
+ else:
34
+ df = audformat.utils.read_csv(data_file)
26
35
  if isinstance(df, pd.Series):
27
36
  df = df.to_frame()
28
37
  rename_cols = self.util.config_val_data(self.name, "colnames", False)
@@ -6,23 +6,26 @@
6
6
 
7
7
  import os
8
8
 
9
- import audeer
10
- import nkululeko.glob_conf as glob_conf
11
9
  import pandas as pd
12
10
  import torch
13
11
  import torchaudio
14
- from audformat.utils import map_file_path
15
- from nkululeko.feat_extract.featureset import Featureset
16
12
  from tqdm import tqdm
17
- from transformers import HubertModel, Wav2Vec2FeatureExtractor
13
+ from transformers import HubertModel
14
+ from transformers import Wav2Vec2FeatureExtractor
15
+
16
+ from nkululeko.feat_extract.featureset import Featureset
17
+ import nkululeko.glob_conf as glob_conf
18
18
 
19
19
 
20
20
  class Hubert(Featureset):
21
- """Class to extract HuBERT embedding)"""
21
+ """Class to extract HuBERT embedding)."""
22
22
 
23
23
  def __init__(self, name, data_df, feat_type):
24
- """Constructor. is_train is needed to distinguish from test/dev sets,
25
- because they use the codebook from the training"""
24
+ """Constructor.
25
+
26
+ Is_train is needed to distinguish from test/dev sets,
27
+ because they use the codebook from the training.
28
+ """
26
29
  super().__init__(name, data_df, feat_type)
27
30
  # check if device is not set, use cuda if available
28
31
  cuda = "cuda" if torch.cuda.is_available() else "cpu"
@@ -61,16 +64,12 @@ class Hubert(Featureset):
61
64
  """Extract the features or load them from disk if present."""
62
65
  store = self.util.get_path("store")
63
66
  storage = f"{store}{self.name}.pkl"
64
- extract = self.util.config_val(
65
- "FEATS", "needs_feature_extraction", False
66
- )
67
+ extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
67
68
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
68
69
  if extract or no_reuse or not os.path.isfile(storage):
69
70
  if not self.model_initialized:
70
71
  self.init_model()
71
- self.util.debug(
72
- "extracting Hubert embeddings, this might take a while..."
73
- )
72
+ self.util.debug("extracting Hubert embeddings, this might take a while...")
74
73
  emb_series = pd.Series(index=self.data_df.index, dtype=object)
75
74
  length = len(self.data_df.index)
76
75
  for idx, (file, start, end) in enumerate(
@@ -84,9 +83,7 @@ class Hubert(Featureset):
84
83
  assert sampling_rate == 16000
85
84
  emb = self.get_embeddings(signal, sampling_rate, file)
86
85
  emb_series.iloc[idx] = emb
87
- self.df = pd.DataFrame(
88
- emb_series.values.tolist(), index=self.data_df.index
89
- )
86
+ self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
90
87
  self.df.to_pickle(storage)
91
88
  try:
92
89
  glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
@@ -4,27 +4,32 @@
4
4
 
5
5
  import os
6
6
 
7
- import nkululeko.glob_conf as glob_conf
8
7
  import pandas as pd
9
8
  import torch
10
9
  import torchaudio
11
- from nkululeko.feat_extract.featureset import Featureset
12
10
  from tqdm import tqdm
13
- from transformers import Wav2Vec2FeatureExtractor, WavLMModel
11
+ from transformers import Wav2Vec2FeatureExtractor
12
+ from transformers import WavLMModel
13
+
14
+ from nkululeko.feat_extract.featureset import Featureset
15
+ import nkululeko.glob_conf as glob_conf
14
16
 
15
17
 
16
18
  class Wavlm(Featureset):
17
- """Class to extract WavLM embedding)"""
19
+ """Class to extract WavLM embedding)."""
20
+
21
+ def __init__(self, name, data_df, feats_type):
22
+ """Constructor.
18
23
 
19
- def __init__(self, name, data_df, feat_type):
20
- """Constructor. is_train is needed to distinguish from test/dev sets,
21
- because they use the codebook from the training"""
22
- super().__init__(name, data_df)
24
+ Is_train is needed to distinguish from test/dev sets,
25
+ because they use the codebook from the training.
26
+ """
27
+ super().__init__(name, data_df, feats_type)
23
28
  # check if device is not set, use cuda if available
24
29
  cuda = "cuda" if torch.cuda.is_available() else "cpu"
25
30
  self.device = self.util.config_val("MODEL", "device", cuda)
26
31
  self.model_initialized = False
27
- self.feat_type = feat_type
32
+ self.feat_type = feats_type
28
33
 
29
34
  def init_model(self):
30
35
  # load model
@@ -59,7 +64,9 @@ class Wavlm(Featureset):
59
64
  frame_offset=int(start.total_seconds() * 16000),
60
65
  num_frames=int((end - start).total_seconds() * 16000),
61
66
  )
62
- assert sampling_rate == 16000, f"sampling rate should be 16000 but is {sampling_rate}"
67
+ assert (
68
+ sampling_rate == 16000
69
+ ), f"sampling rate should be 16000 but is {sampling_rate}"
63
70
  emb = self.get_embeddings(signal, sampling_rate, file)
64
71
  emb_series.iloc[idx] = emb
65
72
  self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.82.2
3
+ Version: 0.82.4
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -333,6 +333,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
333
333
  Changelog
334
334
  =========
335
335
 
336
+ Version 0.82.4
337
+ --------------
338
+ * fixed bug in wavlm
339
+
340
+ Version 0.82.3
341
+ --------------
342
+ * fixed another audformat peculiarity to interprete time values as nanoseconds
343
+
336
344
  Version 0.82.2
337
345
  --------------
338
346
  * fixed audformat peculiarity that dataframes can have only one column
@@ -2,7 +2,7 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
3
  nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=gPbZOExm1t31PbhB4n-QVZ_gqlY3LMOkTLs8QP0Uucg,39
5
+ nkululeko/constants.py,sha256=kp-581CniawYq15MP-eOYf2iEPJCoCdhJ8W3kRUgPhs,39
6
6
  nkululeko/demo.py,sha256=55kNFA2helMhOxD4yZuKg1JWDtlUUpxm-6uAnroIydI,3264
7
7
  nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
8
8
  nkululeko/demo_predictor.py,sha256=-ggSHc3DXxRzjzcGB4qFBOMvKsfUdTkkde50BDrS9dA,4755
@@ -46,7 +46,7 @@ nkululeko/autopredict/ap_valence.py,sha256=n-hctRKySzhmJtowuMOTUu0T_ld3uK5pnfOzW
46
46
  nkululeko/autopredict/estimate_snr.py,sha256=S-bpS0xFkwWc4Ch75UrjbS8y538lQ0U3g_iLRFXureY,5048
47
47
  nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
48
  nkululeko/data/dataset.py,sha256=JGzMD6HIvkFkYBekmbmslIKc5ADaCj06T-8gpqH_kFo,27650
49
- nkululeko/data/dataset_csv.py,sha256=AinRsdR_WTaZ9emhHXzDgjWvQJqBYzFnqD8IVHKB4TQ,3476
49
+ nkululeko/data/dataset_csv.py,sha256=uLa7jW4w2ft299NkpXZMD361kPHF8oSYoIZ_ucxhuOM,3884
50
50
  nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
52
52
  nkululeko/feat_extract/feats_agender_agender.py,sha256=5dA7YA-YGxODovMC7ynMk3bnpPjfs0ApvSfjqvoSZY0,3346
@@ -54,7 +54,7 @@ nkululeko/feat_extract/feats_analyser.py,sha256=_5oz4y-NZCEBgfNP2GZ9WNqQR50Hbykm
54
54
  nkululeko/feat_extract/feats_auddim.py,sha256=VlzKKXTXa5kjLgQBWyEFy-daIyU1SkOwCCOIhKsWCvE,3162
55
55
  nkululeko/feat_extract/feats_audmodel.py,sha256=VjBNgAoxsHJhwr6Kwt9CxX6SaCM4RK_OV-GU2W5-bhU,3187
56
56
  nkululeko/feat_extract/feats_clap.py,sha256=nR6eEIRdsMHcfmD1bNtt5WfDvkxKjvEbukSSrXHm-HU,3489
57
- nkululeko/feat_extract/feats_hubert.py,sha256=ebj5PJtj-DcMudtnBWeY3_d_9pPFeEDEtP6NMDXIZNI,5289
57
+ nkululeko/feat_extract/feats_hubert.py,sha256=cLoUzSLjSYBkQnftjacSL7ES3O7Ysh_KrPYvZtLX_TU,5196
58
58
  nkululeko/feat_extract/feats_import.py,sha256=rj1p8lz19tCAC8hLzzZAwZ0M6gzwH3BzfabFUgal0yw,1622
59
59
  nkululeko/feat_extract/feats_mld.py,sha256=Vvu7GZOkn7Vda8eIOXqHjg78zegkFe3vTUaCXyVM0eA,2021
60
60
  nkululeko/feat_extract/feats_mos.py,sha256=KXNt7QYEfxkvr6UyVhig2aWQBaIvovlrR4gPuP03gmo,4174
@@ -67,7 +67,7 @@ nkululeko/feat_extract/feats_spkrec.py,sha256=VK4ma3uWzM0YZStsgRTirfkbzjWIfRWSgs
67
67
  nkululeko/feat_extract/feats_squim.py,sha256=jToXiwRq5-MQheAP6xczvry1uVIHYUrD8bM7Wb1cnqM,4528
68
68
  nkululeko/feat_extract/feats_trill.py,sha256=HXQBaPWTX0iNEjBY7RD8uyFeYjDieHqv8ZilE0Jb-Pg,3319
69
69
  nkululeko/feat_extract/feats_wav2vec2.py,sha256=tFGe4t6MIVxTDQYR8geNCtZ_3ALc-gpi-rmQbF74HfI,5285
70
- nkululeko/feat_extract/feats_wavlm.py,sha256=8afzqZgHwDRrlHh4y5jnop4objURpXU_IrfiK6orsew,4604
70
+ nkululeko/feat_extract/feats_wavlm.py,sha256=ulxpGjifUFx2ZgGmY32SmBJGIuvkYHoLb2n1LZ8KMwA,4703
71
71
  nkululeko/feat_extract/feats_whisper.py,sha256=0N7Vj65OVi2PNoB_NrDjWT5lP6xZNKxFOZZIoxkJvcA,4533
72
72
  nkululeko/feat_extract/featureset.py,sha256=HtgW2389rmlRAgFP3F1sSFzq2_iUVr2NhOfIXG9omt0,1448
73
73
  nkululeko/feat_extract/feinberg_praat.py,sha256=EP9pMALjlKdiYInLQdrZ7MmE499Mq-ISRCgqbqL3Rxc,21304
@@ -104,8 +104,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
104
104
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
105
105
  nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
106
106
  nkululeko/utils/util.py,sha256=_Z6OMJ3f-8TdETW9eqJYY5hwNRS5XCt9azzRnqoTTZE,12330
107
- nkululeko-0.82.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
108
- nkululeko-0.82.2.dist-info/METADATA,sha256=MasMoxHlcUmYgyaDu1CyNrmnh4vUVJVb0J6EqpV0ta0,35787
109
- nkululeko-0.82.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
110
- nkululeko-0.82.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
111
- nkululeko-0.82.2.dist-info/RECORD,,
107
+ nkululeko-0.82.4.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
108
+ nkululeko-0.82.4.dist-info/METADATA,sha256=iVA5K6coH1oLy918OgAZoq6wqFXcfYIcuZBV1dOnI2g,35950
109
+ nkululeko-0.82.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
110
+ nkululeko-0.82.4.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
111
+ nkululeko-0.82.4.dist-info/RECORD,,