PyPI - nkululeko - Versions diffs - 0.92.2__py3-none-any.whl → 0.93.1__py3-none-any.whl - Mend

nkululeko 0.92.2py3-none-any.whl → 0.93.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

nkululeko/autopredict/ap_sid.py +37 -9
nkululeko/constants.py +1 -1
nkululeko/data/dataset_csv.py +4 -4
nkululeko/experiment.py +52 -55
nkululeko/explore.py +3 -5
nkululeko/feat_extract/feats_wav2vec2.py +5 -4
nkululeko/feat_extract/feats_wavlm.py +3 -2
nkululeko/utils/files.py +26 -2
nkululeko/utils/util.py +5 -1
{nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/METADATA +26 -18
{nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/RECORD +15 -15
{nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/WHEEL +1 -1
{nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/LICENSE +0 -0
{nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/entry_points.txt +0 -0
{nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/top_level.txt +0 -0

nkululeko/autopredict/ap_sid.py CHANGED Viewed

@@ -6,8 +6,11 @@ import numpy as np
 from pyannote.audio import Pipeline
 import torch
+import audiofile
 from nkululeko.feature_extractor import FeatureExtractor
 import nkululeko.glob_conf as glob_conf
+from nkululeko.utils.files import concat_files
 from nkululeko.utils.util import Util
@@ -20,7 +23,7 @@ class SIDPredictor:
     def __init__(self, df):
         self.df = df
         self.util = Util("sidPredictor")
-        hf_token = self.util.config_val("Model", "hf_token", None)
+        hf_token = self.util.config_val("MODEL", "hf_token", None)
         if hf_token is None:
             self.util.error(
                 "speaker id prediction needs huggingface token: [MODEL][hf_token]"
@@ -29,20 +32,45 @@ class SIDPredictor:
             "pyannote/speaker-diarization-3.1",
             use_auth_token=hf_token,
         )
-        device = self.util.config_val("Model", "device", "cpu")
+        device = self.util.config_val("MODEL", "device", "cpu")
         self.pipeline.to(torch.device(device))
     def predict(self, split_selection):
         self.util.debug(f"estimating speaker id for {split_selection} samples")
         return_df = self.df.copy()
-        # @todo
         # 1) concat all audio files
+        tmp_file = "tmp.wav"
+        concat_files(return_df.index, tmp_file)
         # 2) get segmentations with pyannote
-        # 3) map pyannote segments with orginal ones and assign speaker id
+        sname = "pyannotation"
+        if self.util.exist_pickle(sname):
+            annotation = self.util.from_pickle(sname)
+        else:
+            annotation = self.pipeline(tmp_file)
+            self.util.to_pickle(annotation, sname)
+        speakers, starts, ends = [], [], []
+        # print the result
+        for turn, _, speaker in annotation.itertracks(yield_label=True):
+            start = turn.start
+            end = turn.end
+            speakers.append(speaker)
+            starts.append(start)
+            ends.append(end)
+        # 3) map pyannote segments with orginal ones and assign speaker id
+        target_speakers = []
+        position = 0
+        for idx, (file, start, end) in enumerate(return_df.index.to_list()):
+            seg_start = start.total_seconds()
+            seg_end = end.total_seconds()
+            # file_duration = audiofile.duration(file)
+            seg_duration = seg_end - seg_start
+            offset = position + seg_start + seg_duration / 2
+            l = [i < offset for i in starts]
+            r = [i for i, x in enumerate(l) if x]
+            s_index = r.pop()
+            # self.util.debug(f"offset: {offset}, speaker = {speakers[s_index]}")
+            position += seg_duration
+            target_speakers.append(speakers[s_index])
+        return_df["speaker"] = target_speakers
         return return_df
-    def concat_files(self, df):
-        pass
-        # todo
-        # please use https://audeering.github.io/audiofile/usage.html#read-a-file

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.92.2"
+VERSION="0.93.1"
 SAMPLING_RATE = 16000

nkululeko/data/dataset_csv.py CHANGED Viewed

@@ -53,7 +53,7 @@ class Dataset_CSV(Dataset):
             if audformat.index_type(df.index) == "segmented":
                 file_index = (
                     df.index.levels[0]
-                    .map(lambda x: root + "/" + audio_path + "/" + x)
+                    .map(lambda x: os.path.join(root, audio_path, x))
                     .values
                 )
                 df = df.set_index(df.index.set_levels(file_index, level="file"))
@@ -62,20 +62,20 @@ class Dataset_CSV(Dataset):
                     df = pd.DataFrame(df)
                 df = df.set_index(
                     df.index.to_series().apply(
-                        lambda x: root + "/" + audio_path + "/" + x
+                        lambda x: os.path.join(root, audio_path, x)
                     )
                 )
         else:  # absolute path is True
             if audformat.index_type(df.index) == "segmented":
                 file_index = (
-                    df.index.levels[0].map(lambda x: audio_path + "/" + x).values
+                    df.index.levels[0].map(lambda x: os.path.join(audio_path, x)).values
                 )
                 df = df.set_index(df.index.set_levels(file_index, level="file"))
             else:
                 if not isinstance(df, pd.DataFrame):
                     df = pd.DataFrame(df)
                 df = df.set_index(
-                    df.index.to_series().apply(lambda x: audio_path + "/" + x)
+                    df.index.to_series().apply(lambda x: os.path.join(audio_path, x))
                 )
         self.df = df

nkululeko/experiment.py CHANGED Viewed

@@ -197,6 +197,8 @@ class Experiment:
             )
             self.df_test = self._import_csv(storage_test)
             self.df_train = self._import_csv(storage_train)
+            self.train_empty = True if self.df_train.shape[0] == 0 else False
+            self.test_empty = True if self.df_test.shape[0] == 0 else False
         else:
             self.df_train, self.df_test = pd.DataFrame(), pd.DataFrame()
             for d in self.datasets.values():
@@ -212,6 +214,8 @@ class Experiment:
                     self.util.debug(f"warn: {d.name} test empty")
                 self.df_test = pd.concat([self.df_test, d.df_test])
                 self.util.copy_flags(d, self.df_test)
+            self.train_empty = True if self.df_train.shape[0] == 0 else False
+            self.test_empty = True if self.df_test.shape[0] == 0 else False
             store = self.util.get_path("store")
             storage_test = f"{store}testdf.csv"
             storage_train = f"{store}traindf.csv"
@@ -253,50 +257,49 @@ class Experiment:
         if self.util.exp_is_classification():
             datatype = self.util.config_val("DATA", "type", "dummy")
             if datatype == "continuous":
-                # if self.df_test.is_labeled:
-                #     # remember the target in case they get labelencoded later
-                #     self.df_test["class_label"] = self.df_test[self.target]
-                test_cats = self.df_test["class_label"].unique()
-                # else:
-                #     # if there is no target, copy a dummy label
-                #     self.df_test = self._add_random_target(self.df_test)
-                # if self.df_train.is_labeled:
-                #     # remember the target in case they get labelencoded later
-                #     self.df_train["class_label"] = self.df_train[self.target]
-                train_cats = self.df_train["class_label"].unique()
+                if not self.test_empty:
+                    test_cats = self.df_test["class_label"].unique()
+                if not self.train_empty:
+                    train_cats = self.df_train["class_label"].unique()
             else:
-                if self.df_test.is_labeled:
-                    test_cats = self.df_test[self.target].unique()
-                else:
-                    # if there is no target, copy a dummy label
-                    self.df_test = self._add_random_target(self.df_test).astype("str")
-                train_cats = self.df_train[self.target].unique()
-                # print(f"df_train: {pd.DataFrame(self.df_train[self.target])}")
-                # print(f"train_cats with target {self.target}: {train_cats}")
-            if self.df_test.is_labeled:
-                if type(test_cats) == np.ndarray:
-                    self.util.debug(f"Categories test (nd.array): {test_cats}")
-                else:
-                    self.util.debug(f"Categories test (list): {list(test_cats)}")
-            if type(train_cats) == np.ndarray:
-                self.util.debug(f"Categories train (nd.array): {train_cats}")
-            else:
-                self.util.debug(f"Categories train (list): {list(train_cats)}")
+                if not self.test_empty:
+                    if self.df_test.is_labeled:
+                        test_cats = self.df_test[self.target].unique()
+                    else:
+                        # if there is no target, copy a dummy label
+                        self.df_test = self._add_random_target(self.df_test).astype(
+                            "str"
+                        )
+                if not self.train_empty:
+                    train_cats = self.df_train[self.target].unique()
             # encode the labels as numbers
             self.label_encoder = LabelEncoder()
-            self.df_train[self.target] = self.label_encoder.fit_transform(
-                self.df_train[self.target]
-            )
-            self.df_test[self.target] = self.label_encoder.transform(
-                self.df_test[self.target]
-            )
             glob_conf.set_label_encoder(self.label_encoder)
+            if not self.train_empty:
+                if type(train_cats) == np.ndarray:
+                    self.util.debug(f"Categories train (nd.array): {train_cats}")
+                else:
+                    self.util.debug(f"Categories train (list): {list(train_cats)}")
+                self.df_train[self.target] = self.label_encoder.fit_transform(
+                    self.df_train[self.target]
+                )
+            if not self.test_empty:
+                if self.df_test.is_labeled:
+                    if type(test_cats) == np.ndarray:
+                        self.util.debug(f"Categories test (nd.array): {test_cats}")
+                    else:
+                        self.util.debug(f"Categories test (list): {list(test_cats)}")
+                if not self.train_empty:
+                    self.df_test[self.target] = self.label_encoder.transform(
+                        self.df_test[self.target]
+                    )
         if self.got_speaker:
+            speakers_train = 0 if self.train_empty else self.df_train.speaker.nunique()
+            speakers_test = 0 if self.test_empty else self.df_test.speaker.nunique()
             self.util.debug(
-                f"{self.df_test.speaker.nunique()} speakers in test and"
-                f" {self.df_train.speaker.nunique()} speakers in train"
+                f"{speakers_test} speakers in test and"
+                f" {speakers_train} speakers in train"
             )
         target_factor = self.util.config_val("DATA", "target_divide_by", False)
@@ -363,14 +366,16 @@ class Experiment:
             self.util.debug("no feature extractor specified.")
             self.feats_train, self.feats_test = pd.DataFrame(), pd.DataFrame()
             return
-        self.feature_extractor = FeatureExtractor(
-            df_train, feats_types, feats_name, "train"
-        )
-        self.feats_train = self.feature_extractor.extract()
-        self.feature_extractor = FeatureExtractor(
-            df_test, feats_types, feats_name, "test"
-        )
-        self.feats_test = self.feature_extractor.extract()
+        if not self.train_empty:
+            self.feature_extractor = FeatureExtractor(
+                df_train, feats_types, feats_name, "train"
+            )
+            self.feats_train = self.feature_extractor.extract()
+        if not self.test_empty:
+            self.feature_extractor = FeatureExtractor(
+                df_test, feats_types, feats_name, "test"
+            )
+            self.feats_test = self.feature_extractor.extract()
         self.util.debug(
             f"All features: train shape : {self.feats_train.shape}, test"
             f" shape:{self.feats_test.shape}"
@@ -393,12 +398,6 @@ class Experiment:
             self.util.warn(f"new test labels shape: {self.df_test.shape[0]}")
         self._check_scale()
-        # store = self.util.get_path("store")
-        # store_format = self.util.config_val("FEATS", "store_format", "pkl")
-        # storage = f"{store}test_feats.{store_format}"
-        # self.util.write_store(self.feats_test, storage, store_format)
-        # storage = f"{store}train_feats.{store_format}"
-        # self.util.write_store(self.feats_train, storage, store_format)
     def augment(self):
         """Augment the selected samples."""
@@ -422,9 +421,7 @@ class Experiment:
         return df_ret
     def autopredict(self):
-        """
-        Predict labels for samples with existing models and add to the dataframe.
-        """
+        """Predict labels for samples with existing models and add to the dataframe."""
         sample_selection = self.util.config_val("PREDICT", "split", "all")
         if sample_selection == "all":
             df = pd.concat([self.df_train, self.df_test])

nkululeko/explore.py CHANGED Viewed

@@ -1,5 +1,4 @@
-"""
-Explore the feature sets of a machine learning experiment.
+"""Explore the feature sets of a machine learning experiment.
 This script is the entry point for the 'explore' module of the nkululeko framework.
 It handles loading the experiment configuration, setting up the experiment, and
@@ -77,7 +76,6 @@ def main():
         plot_feats = eval(util.config_val("EXPL", "feature_distributions", "False"))
         tsne = eval(util.config_val("EXPL", "tsne", "False"))
         scatter = eval(util.config_val("EXPL", "scatter", "False"))
-        spotlight = eval(util.config_val("EXPL", "spotlight", "False"))
         shap = eval(util.config_val("EXPL", "shap", "False"))
         model_type = util.config_val("EXPL", "model", False)
         plot_tree = eval(util.config_val("EXPL", "plot_tree", "False"))
@@ -87,8 +85,8 @@ def main():
             expr.extract_feats()
             needs_feats = True
             # explore
-            expr.init_runmanager()
-            expr.runmgr.do_runs()
+            # expr.init_runmanager()
+            # expr.runmgr.do_runs()
     expr.analyse_features(needs_feats)
     expr.store_report()
     print("DONE")

nkululeko/feat_extract/feats_wav2vec2.py CHANGED Viewed

@@ -1,4 +1,4 @@
-""" feats_wav2vec2.py
+"""feats_wav2vec2.py.
 feat_types example = [wav2vec2-large-robust-ft-swbd-300h,
 wav2vec2-xls-r-2b, wav2vec2-large, wav2vec2-large-xlsr-53, wav2vec2-base]
@@ -11,12 +11,13 @@ import os
 import pandas as pd
 import torch
 import torchaudio
-import transformers
 from tqdm import tqdm
-from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Model
+import transformers
+from transformers import Wav2Vec2FeatureExtractor
+from transformers import Wav2Vec2Model
-import nkululeko.glob_conf as glob_conf
 from nkululeko.feat_extract.featureset import Featureset
+import nkululeko.glob_conf as glob_conf
 class Wav2vec2(Featureset):

nkululeko/feat_extract/feats_wavlm.py CHANGED Viewed

@@ -8,10 +8,11 @@ import pandas as pd
 import torch
 import torchaudio
 from tqdm import tqdm
-from transformers import Wav2Vec2FeatureExtractor, WavLMModel
+from transformers import Wav2Vec2FeatureExtractor
+from transformers import WavLMModel
-import nkululeko.glob_conf as glob_conf
 from nkululeko.feat_extract.featureset import Featureset
+import nkululeko.glob_conf as glob_conf
 class Wavlm(Featureset):

nkululeko/utils/files.py CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-# copied from librosa.util.files.py
+# find_files copied from librosa.util.files.py
 """Utility functions for dealing with files"""
 from __future__ import annotations
@@ -8,11 +8,22 @@ from __future__ import annotations
 import glob
 import os
 from pathlib import Path
-from typing import Any, List, Optional, Set, Union
+from typing import Any
+from typing import List
+from typing import Optional
+from typing import Set
+from typing import Union
+import numpy as np
+from tqdm import tqdm
+import audiofile
 # add new function here
 __all__ = [
     "find_files",
+    "concat_files",
 ]
@@ -143,3 +154,16 @@ def __get_files(dir_name: Union[str, os.PathLike[Any]], extensions: Set[str]):
         myfiles |= set(glob.glob(globstr))
     return myfiles
+def concat_files(index, outfile_path):
+    buffer = np.asarray([])
+    sr = 16000
+    for idx, (file, start, end) in enumerate(tqdm(index.to_list())):
+        signal, sr = audiofile.read(
+            file,
+            offset=start.total_seconds(),
+            duration=(end - start).total_seconds(),
+        )
+        buffer = np.concatenate([buffer, signal])
+    audiofile.write(outfile_path, buffer, sr)

nkululeko/utils/util.py CHANGED Viewed

@@ -230,7 +230,11 @@ class Util:
     def get_model_description(self):
         mt = ""
-        mt = f'{self.config["MODEL"]["type"]}'
+        try:
+            mt = f'{self.config["MODEL"]["type"]}'
+        except KeyError:
+            # no model type given
+            pass
         # ft = "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
         ft_value = self.config["FEATS"]["type"]
         if (

{nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.92.2
+Version: 0.93.1
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -100,22 +100,22 @@ And can show the distribution of specific features per category:
 <img src="meta/images/feat_dist.png" width="500px"/>
 ### t-SNE plots
-A t-SNE plot can give you an estimate wether your acoustic features are useful at all:
+A t-SNE plot can give you an estimate of whether your acoustic features are useful at all:
 <img src="meta/images/tsne.png" width="500px"/>
 ### Data distribution
-Sometimes you only want to take a look at your data:
+Sometimes, you only want to take a look at your data:
 <img src="meta/images/data_plot.png" width="500px"/>
 ### Bias checking
-In cases you might wonder if there's bias in your data. You can try to detect this with automatically estimated speech properties, by visualizing the correlation of target label and predicted labels.
+In some cases, you might wonder if there's bias in your data. You can try to detect this with automatically estimated speech properties by visualizing the correlation of target labels and predicted labels.
 <img src="meta/images/emotion-pesq.png" width="500px"/>
 ### Uncertainty
-Nkululeko estimates uncertainty of model decision (only for classifiers) with entropy over the class-probabilities or logits per sample.
+Nkululeko estimates the uncertainty of model decisions (only for classifiers) with entropy over the class probabilities or logits per sample.
 <img src="meta/images/uncertainty.png" width="500px"/>
@@ -138,7 +138,7 @@ appears, please try
 ```
 pip install x
 ```
-For many packages you will need the missing torch package.
+For many packages, you will need the missing torch package.
 If you don't have a GPU (which is probably true if you don't know what that is), please use
 ```
 pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
@@ -219,7 +219,7 @@ All of them take *--config <my_config.ini>* as an argument.
 * **nkululeko.predict**: [predict features](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/) like SNR, MOS, arousal/valence, age/gender, with DNN models
 * **nkululeko.segment**: [segment a database](http://blog.syntheticspeech.de/2023/07/14/nkululeko-segmenting-a-database/) based on VAD (voice activity detection)
 * **nkululeko.resample**: check on all [sampling rates and change](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/) to 16kHz
-* **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command-line. Usage:
+* **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command line. Usage:
   ```bash
   $ python -m nkululeko.nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET] [--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
@@ -236,7 +236,7 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
 * [Combine feature sets](http://blog.syntheticspeech.de/2022/06/30/how-to-combine-feature-sets-with-nkululeko/)
 * [Classifying continuous variables](http://blog.syntheticspeech.de/2022/01/26/nkululeko-classifying-continuous-variables/)
 * [Try out / demo a trained model](http://blog.syntheticspeech.de/2022/01/24/nkululeko-try-out-demo-a-trained-model/)
-* [Perform cross database experiments](http://blog.syntheticspeech.de/2021/10/05/nkululeko-perform-cross-database-experiments/)
+* [Perform cross-database experiments](http://blog.syntheticspeech.de/2021/10/05/nkululeko-perform-cross-database-experiments/)
 * [Meta parameter optimization](http://blog.syntheticspeech.de/2021/09/03/perform-optimization-with-nkululeko/)
 * [How to set up wav2vec embedding](http://blog.syntheticspeech.de/2021/12/03/how-to-set-up-wav2vec-embedding-for-nkululeko/)
 * [How to soft-label a database](http://blog.syntheticspeech.de/2022/01/24/how-to-soft-label-a-database-with-nkululeko/)
@@ -261,7 +261,7 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
 * [Predict new labels for your data from public models and check bias](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/)
 * [Resample](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/)
 * [Get some statistics on correlation and effect-size](http://blog.syntheticspeech.de/2023/09/05/nkululeko-get-some-statistics-on-correlation-and-effect-size/)
-* [Automatic generation of a latex / pdf report](http://blog.syntheticspeech.de/2023/09/26/nkululeko-generate-a-latex-pdf-report/)
+* [Automatic generation of a latex/pdf report](http://blog.syntheticspeech.de/2023/09/26/nkululeko-generate-a-latex-pdf-report/)
 * [Inspect your data with Spotlight](http://blog.syntheticspeech.de/2023/10/31/nkululeko-inspect-your-data-with-spotlight/)
 * [Automatically stratify your split sets](http://blog.syntheticspeech.de/2023/11/07/nkululeko-automatically-stratify-your-split-sets/)
 * [re-name data column names](http://blog.syntheticspeech.de/2023/11/16/nkululeko-re-name-data-column-names/)
@@ -277,7 +277,7 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
 * NEW: [Here's a Google colab that runs this example out-of-the-box](https://colab.research.google.com/drive/1Up7t5Nn7VwDPCCEpTg2U7cpZ_PdoEgj-?usp=sharing), and here is the same [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
 * [I made a video to show you how to do this on Windows](https://www.youtube.com/playlist?list=PLRceVavtxLg0y2jiLmpnUfiMtfvkK912D)
 * Set up Python on your computer, version >= 3.8
-* Open a terminal/commandline/console window
+* Open a terminal/command line/console window
 * Test python by typing ```python```, python should start with version >3 (NOT 2!). You can leave the Python Interpreter by typing *exit()*
 * Create a folder on your computer for this example, let's call it `nkulu_work`
 * Get a copy of the [Berlin emodb in audformat](https://zenodo.org/records/7447302/files/emodb.zip?download=1) and unpack inside the folder you just created (`nkulu_work`)
@@ -293,7 +293,7 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
     * if that worked, you should see a ```(venv)``` in front of your prompt
   * Install the required packages in your environment
     * ```pip install nkululeko```
-    * Repeat until all error messages vanished (or fix them, or try to ignore them)...
+    * Repeat until all error messages vanish (or fix them, or try to ignore them)...
 * Now you should have two folders in your *nkulu_work* folder:
   * *emodb* and *venv*
 * Download a copy of the file [exp_emodb.ini](meta/demos/exp_emodb.ini) to the current working directory (```nkulu_work```)
@@ -301,9 +301,9 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
   * ```python -m nkululeko.nkululeko --config exp_emodb.ini```
 * Find the results in the newly created folder exp_emodb
   * Inspect ```exp_emodb/images/run_0/emodb_xgb_os_0_000_cnf.png```
-  * This is the main result of you experiment: a confusion matrix for the emodb emotional categories
+  * This is the main result of your experiment: a confusion matrix for the emodb emotional categories
 * Inspect and play around with the [demo configuration file](meta/demos/exp_emodb.ini) that defined your experiment, then re-run.
-* There are many ways to experiment with different classifiers and acoustic features sets, [all described here](https://github.com/felixbur/nkululeko/blob/main/ini_file.md)
+* There are many ways to experiment with different classifiers and acoustic feature sets, [all described here](https://github.com/felixbur/nkululeko/blob/main/ini_file.md)
 ### Features
 The framework is targeted at the speech domain and supports experiments where different classifiers are combined with different feature extractors.
@@ -327,16 +327,16 @@ Here's [an animation that shows the progress of classification done with nkulule
 ## License
-Nkululeko can be used under the [MIT license](https://choosealicense.com/licenses/mit/)
+Nkululeko can be used under the [MIT license](https://choosealicense.com/licenses/mit/).
 ## Contributing
-Contributions are welcome and encouraged. To learn more about how to contribute to nkululeko please refer to the [Contributing guidelines](./CONTRIBUTING.md)
+Contributions are welcome and encouraged. To learn more about how to contribute to nkululeko, please refer to the [Contributing guidelines](./CONTRIBUTING.md).
 ## Citing
-If you use it, please mention the Nkululeko paper
+If you use it, please mention the Nkululeko paper:
-F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schuller: Nkululeko: A Tool For Rapid Speaker Characteristics Detection, Proc. Proc. LREC, 2022
+> F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schuller: Nkululeko: A Tool For Rapid Speaker Characteristics Detection, Proc. Proc. LREC, 2022
 ```
@@ -355,6 +355,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
+Version 0.93.1
+--------------
+* made explore module more robust
+Version 0.93.0
+--------------
+* integrated pyannote for speaker prediction for predict module
 Version 0.92.2
 --------------
 * added some output to automatic speaker id
@@ -365,7 +373,7 @@ Version 0.92.1
 Version 0.92.0
 --------------
-* added first version of automatic speaker prediction/segmentation
+* added first version of automatic speaker prediction for segment module
 Version 0.91.3
 --------------

{nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/RECORD RENAMED Viewed

@@ -2,14 +2,14 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=FoMbBrfyOZd4QAw7oIHl3X6-UpsqAKWVDIolCA7qOWs,3196
 nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
 nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
-nkululeko/constants.py,sha256=HBBuhT6kpIHhRMiSBkU07cszGO7kO2A8qTYrN6zH9rw,39
+nkululeko/constants.py,sha256=-K1r-fO1ilOQ-FT2-YDWo37lCfB7mjYPEylDVqqcP_s,39
 nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
 nkululeko/demo.py,sha256=4Yzhg6pCPBYPGJrP7JX2TysVosl_R1llpVDKc2P_gUA,4955
 nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
 nkululeko/demo_predictor.py,sha256=lDF-xOxRdEAclOmbepAYg-BQXQdGkHfq2n74PTIoop8,4872
 nkululeko/ensemble.py,sha256=71V-rre61H3J4sh7lu-OTo4I2_g7mm_rQxwW1ARDHgY,12782
-nkululeko/experiment.py,sha256=h3DS-k6vk5juXa3HJXI7Z4vvnNspO4qj5SJ1o1Z3PIk,31860
-nkululeko/explore.py,sha256=Y5lPPychnI-7fyP8zvwVb9P09fvprbUPOofOppuABYQ,3658
+nkululeko/experiment.py,sha256=G5yNFO3z8yGAXJYzZbA-ANAPU9tTtijVyOGG7NGsn2M,31701
+nkululeko/explore.py,sha256=FPM2CS-LKgcDV-LnjYlD6pEv7HuCQpH_C3KyyiOCdk4,3589
 nkululeko/export.py,sha256=U-V4acxtuL6qKt6oAsVcM5TTeWogYUJ3GU-lA6rq6d4,4336
 nkululeko/feature_extractor.py,sha256=UnspIWz3XrNhKnBBhWZkH2bHvD-sROtrQVqB1JvkUyw,4088
 nkululeko/file_checker.py,sha256=xJY0Q6w47pnmgJVK5rcAKPYBrCpV7eBT4_3YBzTx-H8,3454
@@ -43,14 +43,14 @@ nkululeko/autopredict/ap_gender.py,sha256=b6oTqHKVwOnYh4YlKbuMflssS4HJqs_c1ayusa
 nkululeko/autopredict/ap_mos.py,sha256=e4hmgb0Yf1_AbC5P0CqXJIvufjhbTrqmI5goARxrY0Y,1107
 nkululeko/autopredict/ap_pesq.py,sha256=mRt3Loucaoy4vJxwfuxUt0fP88bMGvkmrLCEpKEXWp0,1140
 nkululeko/autopredict/ap_sdr.py,sha256=VQ2UkxOO3ipqYNNjFwKgEaGCk8IzLI5lX_2tZFLIvTY,1188
-nkululeko/autopredict/ap_sid.py,sha256=87LXMHzJ8jt2q9dUtPJd_nJi_XOcFoqpbva-BT4UJN0,1393
+nkululeko/autopredict/ap_sid.py,sha256=mCxf2DUOPUlDdnVwCeljFJtCXM4uum1poZQ9RrwHHM8,2641
 nkululeko/autopredict/ap_snr.py,sha256=AiTU8-7CMEeowmYkMO19lw1HCb1yTXC6KeulNf8gOqw,1110
 nkululeko/autopredict/ap_stoi.py,sha256=UEQg1ZV0meAsxgdWB8ieRs9GPXHqArmsaOyCGRwpcnA,1187
 nkululeko/autopredict/ap_valence.py,sha256=WrW4Ltqi_odW49_4QEVKkfnrcztLIVZ4cXIEHu4dBN8,1026
 nkululeko/autopredict/estimate_snr.py,sha256=1k9-XadABudnsNOeFZD_Fg0E64-GUQVS7JEp82MLQS4,4995
 nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/data/dataset.py,sha256=Hz2IOsdcESG-P3aP7r4d1xj_gIP6fyGCYOwukoQ7SM8,29321
-nkululeko/data/dataset_csv.py,sha256=mb7FpHOmJHxpt1QYVBKveyqJN3MUt30TRfwwJw0TT5c,4816
+nkululeko/data/dataset_csv.py,sha256=p2b4eS5R2Q5zdOIc56NRRU2PTFXSRt0qrdHGafHkWKo,4830
 nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/feat_extract/feats_agender.py,sha256=onfAQ6-xx_mFMJXEF1IX8cHBmGtGeX6weJmxbkfh1_o,3184
 nkululeko/feat_extract/feats_agender_agender.py,sha256=_YQv1qw--3uQfnyTQDCwlmPRnrhdMhgXbYK2yQtseW0,3464
@@ -71,8 +71,8 @@ nkululeko/feat_extract/feats_spectra.py,sha256=6WhFUpB0WTutg7OFMlAw9lSwVU5OBYCDc
 nkululeko/feat_extract/feats_spkrec.py,sha256=o_6bdU4lIkj64S5Kdjf1iyuo1VASeYxE4XdxV94a8gE,4732
 nkululeko/feat_extract/feats_squim.py,sha256=yJifsp9kj9iJjW_UAKr3LlvVhX5rv7el4bepn0wN2a8,4578
 nkululeko/feat_extract/feats_trill.py,sha256=TUCrh5xbfnHD2gzb9mlkMSV4aK6YXazMqsh5xJ5yzUI,3188
-nkululeko/feat_extract/feats_wav2vec2.py,sha256=lINWb2rBLXuMzNKV8gKsTke8wuXIF1X4jOu-GMB3aPg,5272
-nkululeko/feat_extract/feats_wavlm.py,sha256=kTuxFnymBMYP3t9yAQJjRQ5ul4AiS0O8NXq3z6B9AYs,4731
+nkululeko/feat_extract/feats_wav2vec2.py,sha256=WYB9XlRzgDi8cGSKzhV5jahA0GZ_SiWgaQ25IcEemto,5296
+nkululeko/feat_extract/feats_wavlm.py,sha256=O9cfc39VF5aPJRRATKb37pHT4W11i2cu5O1mY9LOjIA,4755
 nkululeko/feat_extract/feats_whisper.py,sha256=n3ESZtva7wshs8E8diBlQYa9xCH_P0UY1DncSrxz-FY,4508
 nkululeko/feat_extract/featureset.py,sha256=clcBv9rzBRW-bfw7JC_FYTjU5uUS-c0UE1XtQLYYRiE,1615
 nkululeko/feat_extract/feinberg_praat.py,sha256=bgzWtQkKbgcygrzwAxDXosui1rcc38qhWuJq9GLr0z8,21308
@@ -109,12 +109,12 @@ nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=b3t0zdpJYofKWMyKRMtMMX91xe
 nkululeko/segmenting/seg_pyannote.py,sha256=6IPbgjnGOz9juzEKDTZN3PSipX4t6Mz-DILAx3rp5do,4216
 nkululeko/segmenting/seg_silero.py,sha256=ulodnvtRq5MLHDxy_RmAK4tJg6h1d-mPq-uCPFkGVKg,4258
 nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
+nkululeko/utils/files.py,sha256=SrrYaU7AB80MZHiV1jcB0h_zigvYLYgSVNTXV4ao38g,4593
 nkululeko/utils/stats.py,sha256=vCRzhCR0Gx5SiJyAGbj1TIto8ocGz58CM5Pr3LltagA,2948
-nkululeko/utils/util.py,sha256=XFZdhCc_LM4EmoZ5tKKaBCQLXclcNmvHwhfT_CXB98c,16723
-nkululeko-0.92.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.92.2.dist-info/METADATA,sha256=pwdxFGECc-W2WdmnXxgJz6Jy3CbvwzeHASfu7WxFK7g,41832
-nkululeko-0.92.2.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
-nkululeko-0.92.2.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
-nkululeko-0.92.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.92.2.dist-info/RECORD,,
+nkululeko/utils/util.py,sha256=yxETonpbcGTeJhvdDr7sC4CO0Qtf-pgHEclZ76eOtPA,16816
+nkululeko-0.93.1.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.93.1.dist-info/METADATA,sha256=eLAEogu2sQxBrmzlxfHKs8AP2d9uELQLGhmIJ5sBWgk,42018
+nkululeko-0.93.1.dist-info/WHEEL,sha256=a7TGlA-5DaHMRrarXjVbQagU3Man_dCnGIWMJr5kRWo,91
+nkululeko-0.93.1.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
+nkululeko-0.93.1.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.93.1.dist-info/RECORD,,

{nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.3.0)
+Generator: setuptools (75.4.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.92.2__py3-none-any.whl → 0.93.1__py3-none-any.whl

nkululeko 0.92.2py3-none-any.whl → 0.93.1py3-none-any.whl