PyPI - nkululeko - Versions diffs - 0.82.2__py3-none-any.whl → 0.82.4__py3-none-any.whl - Mend

nkululeko 0.82.2py3-none-any.whl → 0.82.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.82.2"
+VERSION="0.82.4"
 SAMPLING_RATE = 16000

nkululeko/data/dataset_csv.py CHANGED Viewed

@@ -22,7 +22,16 @@ class Dataset_CSV(Dataset):
         #     data_file = os.path.join(exp_root, data_file)
         root = os.path.dirname(data_file)
         audio_path = self.util.config_val_data(self.name, "audio_path", "")
-        df = audformat.utils.read_csv(data_file)
+        df = pd.read_csv(data_file)
+        # special treatment for segmented dataframes with only one column:
+        if "start" in df.columns and len(df.columns) == 4:
+            index = audformat.segmented_index(
+                df.file.values, df.start.values, df.end.values
+            )
+            df = df.set_index(index)
+            df = df.drop(columns=["file", "start", "end"])
+        else:
+            df = audformat.utils.read_csv(data_file)
         if isinstance(df, pd.Series):
             df = df.to_frame()
         rename_cols = self.util.config_val_data(self.name, "colnames", False)

nkululeko/feat_extract/feats_hubert.py CHANGED Viewed

@@ -6,23 +6,26 @@
 import os
-import audeer
-import nkululeko.glob_conf as glob_conf
 import pandas as pd
 import torch
 import torchaudio
-from audformat.utils import map_file_path
-from nkululeko.feat_extract.featureset import Featureset
 from tqdm import tqdm
-from transformers import HubertModel, Wav2Vec2FeatureExtractor
+from transformers import HubertModel
+from transformers import Wav2Vec2FeatureExtractor
+from nkululeko.feat_extract.featureset import Featureset
+import nkululeko.glob_conf as glob_conf
 class Hubert(Featureset):
-    """Class to extract HuBERT embedding)"""
+    """Class to extract HuBERT embedding)."""
     def __init__(self, name, data_df, feat_type):
-        """Constructor. is_train is needed to distinguish from test/dev sets,
-        because they use the codebook from the training"""
+        """Constructor.
+        Is_train is needed to distinguish from test/dev sets,
+        because they use the codebook from the training.
+        """
         super().__init__(name, data_df, feat_type)
         # check if device is not set, use cuda if available
         cuda = "cuda" if torch.cuda.is_available() else "cpu"
@@ -61,16 +64,12 @@ class Hubert(Featureset):
         """Extract the features or load them from disk if present."""
         store = self.util.get_path("store")
         storage = f"{store}{self.name}.pkl"
-        extract = self.util.config_val(
-            "FEATS", "needs_feature_extraction", False
-        )
+        extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or no_reuse or not os.path.isfile(storage):
             if not self.model_initialized:
                 self.init_model()
-            self.util.debug(
-                "extracting Hubert embeddings, this might take a while..."
-            )
+            self.util.debug("extracting Hubert embeddings, this might take a while...")
             emb_series = pd.Series(index=self.data_df.index, dtype=object)
             length = len(self.data_df.index)
             for idx, (file, start, end) in enumerate(
@@ -84,9 +83,7 @@ class Hubert(Featureset):
                 assert sampling_rate == 16000
                 emb = self.get_embeddings(signal, sampling_rate, file)
                 emb_series.iloc[idx] = emb
-            self.df = pd.DataFrame(
-                emb_series.values.tolist(), index=self.data_df.index
-            )
+            self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
             self.df.to_pickle(storage)
             try:
                 glob_conf.config["DATA"]["needs_feature_extraction"] = "false"

nkululeko/feat_extract/feats_wavlm.py CHANGED Viewed

@@ -4,27 +4,32 @@
 import os
-import nkululeko.glob_conf as glob_conf
 import pandas as pd
 import torch
 import torchaudio
-from nkululeko.feat_extract.featureset import Featureset
 from tqdm import tqdm
-from transformers import Wav2Vec2FeatureExtractor, WavLMModel
+from transformers import Wav2Vec2FeatureExtractor
+from transformers import WavLMModel
+from nkululeko.feat_extract.featureset import Featureset
+import nkululeko.glob_conf as glob_conf
 class Wavlm(Featureset):
-    """Class to extract WavLM embedding)"""
+    """Class to extract WavLM embedding)."""
+    def __init__(self, name, data_df, feats_type):
+        """Constructor.
-    def __init__(self, name, data_df, feat_type):
-        """Constructor. is_train is needed to distinguish from test/dev sets,
-        because they use the codebook from the training"""
-        super().__init__(name, data_df)
+        Is_train is needed to distinguish from test/dev sets,
+        because they use the codebook from the training.
+        """
+        super().__init__(name, data_df, feats_type)
         # check if device is not set, use cuda if available
         cuda = "cuda" if torch.cuda.is_available() else "cpu"
         self.device = self.util.config_val("MODEL", "device", cuda)
         self.model_initialized = False
-        self.feat_type = feat_type
+        self.feat_type = feats_type
     def init_model(self):
         # load model
@@ -59,7 +64,9 @@ class Wavlm(Featureset):
                     frame_offset=int(start.total_seconds() * 16000),
                     num_frames=int((end - start).total_seconds() * 16000),
                 )
-                assert sampling_rate == 16000, f"sampling rate should be 16000 but is {sampling_rate}"
+                assert (
+                    sampling_rate == 16000
+                ), f"sampling rate should be 16000 but is {sampling_rate}"
                 emb = self.get_embeddings(signal, sampling_rate, file)
                 emb_series.iloc[idx] = emb
             self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)

{nkululeko-0.82.2.dist-info → nkululeko-0.82.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.82.2
+Version: 0.82.4
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -333,6 +333,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
+Version 0.82.4
+--------------
+* fixed bug in wavlm
+Version 0.82.3
+--------------
+* fixed another audformat peculiarity to interprete time values as nanoseconds
 Version 0.82.2
 --------------
 * fixed audformat peculiarity that dataframes can have only one column

{nkululeko-0.82.2.dist-info → nkululeko-0.82.4.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
 nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
 nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
-nkululeko/constants.py,sha256=gPbZOExm1t31PbhB4n-QVZ_gqlY3LMOkTLs8QP0Uucg,39
+nkululeko/constants.py,sha256=kp-581CniawYq15MP-eOYf2iEPJCoCdhJ8W3kRUgPhs,39
 nkululeko/demo.py,sha256=55kNFA2helMhOxD4yZuKg1JWDtlUUpxm-6uAnroIydI,3264
 nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
 nkululeko/demo_predictor.py,sha256=-ggSHc3DXxRzjzcGB4qFBOMvKsfUdTkkde50BDrS9dA,4755
@@ -46,7 +46,7 @@ nkululeko/autopredict/ap_valence.py,sha256=n-hctRKySzhmJtowuMOTUu0T_ld3uK5pnfOzW
 nkululeko/autopredict/estimate_snr.py,sha256=S-bpS0xFkwWc4Ch75UrjbS8y538lQ0U3g_iLRFXureY,5048
 nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/data/dataset.py,sha256=JGzMD6HIvkFkYBekmbmslIKc5ADaCj06T-8gpqH_kFo,27650
-nkululeko/data/dataset_csv.py,sha256=AinRsdR_WTaZ9emhHXzDgjWvQJqBYzFnqD8IVHKB4TQ,3476
+nkululeko/data/dataset_csv.py,sha256=uLa7jW4w2ft299NkpXZMD361kPHF8oSYoIZ_ucxhuOM,3884
 nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
 nkululeko/feat_extract/feats_agender_agender.py,sha256=5dA7YA-YGxODovMC7ynMk3bnpPjfs0ApvSfjqvoSZY0,3346
@@ -54,7 +54,7 @@ nkululeko/feat_extract/feats_analyser.py,sha256=_5oz4y-NZCEBgfNP2GZ9WNqQR50Hbykm
 nkululeko/feat_extract/feats_auddim.py,sha256=VlzKKXTXa5kjLgQBWyEFy-daIyU1SkOwCCOIhKsWCvE,3162
 nkululeko/feat_extract/feats_audmodel.py,sha256=VjBNgAoxsHJhwr6Kwt9CxX6SaCM4RK_OV-GU2W5-bhU,3187
 nkululeko/feat_extract/feats_clap.py,sha256=nR6eEIRdsMHcfmD1bNtt5WfDvkxKjvEbukSSrXHm-HU,3489
-nkululeko/feat_extract/feats_hubert.py,sha256=ebj5PJtj-DcMudtnBWeY3_d_9pPFeEDEtP6NMDXIZNI,5289
+nkululeko/feat_extract/feats_hubert.py,sha256=cLoUzSLjSYBkQnftjacSL7ES3O7Ysh_KrPYvZtLX_TU,5196
 nkululeko/feat_extract/feats_import.py,sha256=rj1p8lz19tCAC8hLzzZAwZ0M6gzwH3BzfabFUgal0yw,1622
 nkululeko/feat_extract/feats_mld.py,sha256=Vvu7GZOkn7Vda8eIOXqHjg78zegkFe3vTUaCXyVM0eA,2021
 nkululeko/feat_extract/feats_mos.py,sha256=KXNt7QYEfxkvr6UyVhig2aWQBaIvovlrR4gPuP03gmo,4174
@@ -67,7 +67,7 @@ nkululeko/feat_extract/feats_spkrec.py,sha256=VK4ma3uWzM0YZStsgRTirfkbzjWIfRWSgs
 nkululeko/feat_extract/feats_squim.py,sha256=jToXiwRq5-MQheAP6xczvry1uVIHYUrD8bM7Wb1cnqM,4528
 nkululeko/feat_extract/feats_trill.py,sha256=HXQBaPWTX0iNEjBY7RD8uyFeYjDieHqv8ZilE0Jb-Pg,3319
 nkululeko/feat_extract/feats_wav2vec2.py,sha256=tFGe4t6MIVxTDQYR8geNCtZ_3ALc-gpi-rmQbF74HfI,5285
-nkululeko/feat_extract/feats_wavlm.py,sha256=8afzqZgHwDRrlHh4y5jnop4objURpXU_IrfiK6orsew,4604
+nkululeko/feat_extract/feats_wavlm.py,sha256=ulxpGjifUFx2ZgGmY32SmBJGIuvkYHoLb2n1LZ8KMwA,4703
 nkululeko/feat_extract/feats_whisper.py,sha256=0N7Vj65OVi2PNoB_NrDjWT5lP6xZNKxFOZZIoxkJvcA,4533
 nkululeko/feat_extract/featureset.py,sha256=HtgW2389rmlRAgFP3F1sSFzq2_iUVr2NhOfIXG9omt0,1448
 nkululeko/feat_extract/feinberg_praat.py,sha256=EP9pMALjlKdiYInLQdrZ7MmE499Mq-ISRCgqbqL3Rxc,21304
@@ -104,8 +104,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
 nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
 nkululeko/utils/util.py,sha256=_Z6OMJ3f-8TdETW9eqJYY5hwNRS5XCt9azzRnqoTTZE,12330
-nkululeko-0.82.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.82.2.dist-info/METADATA,sha256=MasMoxHlcUmYgyaDu1CyNrmnh4vUVJVb0J6EqpV0ta0,35787
-nkululeko-0.82.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-nkululeko-0.82.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.82.2.dist-info/RECORD,,
+nkululeko-0.82.4.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.82.4.dist-info/METADATA,sha256=iVA5K6coH1oLy918OgAZoq6wqFXcfYIcuZBV1dOnI2g,35950
+nkululeko-0.82.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+nkululeko-0.82.4.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.82.4.dist-info/RECORD,,

{nkululeko-0.82.2.dist-info → nkululeko-0.82.4.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.82.2.dist-info → nkululeko-0.82.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{nkululeko-0.82.2.dist-info → nkululeko-0.82.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.82.2__py3-none-any.whl → 0.82.4__py3-none-any.whl

nkululeko 0.82.2py3-none-any.whl → 0.82.4py3-none-any.whl