PyPI - nkululeko - Versions diffs - 0.92.0__py3-none-any.whl → 0.92.2__py3-none-any.whl - Mend

nkululeko 0.92.0py3-none-any.whl → 0.92.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.92.0"
+VERSION="0.92.2"
 SAMPLING_RATE = 16000

nkululeko/plots.py CHANGED Viewed

@@ -4,14 +4,14 @@ import ast
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-import seaborn as sns
 from scipy import stats
+import seaborn as sns
 from sklearn.manifold import TSNE
 import nkululeko.glob_conf as glob_conf
-import nkululeko.utils.stats as su
 from nkululeko.reporting.defines import Header
 from nkululeko.reporting.report_item import ReportItem
+import nkululeko.utils.stats as su
 from nkululeko.utils.util import Util
@@ -30,8 +30,6 @@ class Plots:
             df_speaker["samplenum"] = df_speaker.shape[0]
             df_speakers = pd.concat([df_speakers, df_speaker.head(1)])
         # plot the distribution of samples per speaker
-        # one up because of the runs
-        fig_dir = self.util.get_path("fig_dir") + "../"
         self.util.debug("plotting samples per speaker")
         if "gender" in df_speakers:
             filename = "samples_value_counts"
@@ -319,6 +317,31 @@ class Plots:
         img_path = f"{fig_dir}{filename}_{sample_selection}.{self.format}"
         plt.savefig(img_path)
         plt.close(fig)
+        self.util.debug(f"plotted durations to {img_path}")
+        glob_conf.report.add_item(
+            ReportItem(
+                Header.HEADER_EXPLORE,
+                caption,
+                title,
+                img_path,
+            )
+        )
+    def plot_speakers(self, df, sample_selection):
+        filename = "speakers"
+        caption = "speakers"
+        # one up because of the runs
+        fig_dir = self.util.get_path("fig_dir") + "../"
+        sns.set_style("whitegrid")  # Set style for chart
+        ax = df["speaker"].value_counts().plot(kind="pie", autopct="%1.1f%%")
+        title = f"Speaker distr. for {sample_selection} {df.shape[0]}."
+        ax.set_title(title)
+        fig = ax.figure
+        # plt.tight_layout()
+        img_path = f"{fig_dir}{filename}_{sample_selection}.{self.format}"
+        plt.savefig(img_path)
+        plt.close(fig)
+        self.util.debug(f"plotted speakers to {img_path}")
         glob_conf.report.add_item(
             ReportItem(
                 Header.HEADER_EXPLORE,

nkululeko/segment.py CHANGED Viewed

@@ -23,6 +23,8 @@ import os
 import pandas as pd
+import audformat
 from nkululeko.constants import VERSION
 from nkululeko.experiment import Experiment
 import nkululeko.glob_conf as glob_conf
@@ -63,7 +65,7 @@ def main():
     # segment
     segmented_file = util.config_val("SEGMENT", "result", "segmented.csv")
-    segmenter = util.config_val("SEGMENT", "method", "silero")
+    method = util.config_val("SEGMENT", "method", "silero")
     sample_selection = util.config_val("SEGMENT", "sample_selection", "all")
     if sample_selection == "all":
         df = pd.concat([expr.df_train, expr.df_test])
@@ -76,19 +78,33 @@ def main():
             f"unknown segmentation selection specifier {sample_selection},"
             " should be [all | train | test]"
         )
-    util.debug(f"segmenting {sample_selection}: {df.shape[0]} samples with {segmenter}")
-    if segmenter == "silero":
-        from nkululeko.segmenting.seg_silero import Silero_segmenter
-        segmenter = Silero_segmenter()
-        df_seg = segmenter.segment_dataframe(df)
-    elif segmenter == "pyannote":
-        from nkululeko.segmenting.seg_pyannote import Pyannote_segmenter
-        segmenter = Pyannote_segmenter(config)
-        df_seg = segmenter.segment_dataframe(df)
+    result_file = f"{expr.data_dir}/{segmented_file}"
+    if os.path.exists(result_file):
+        util.debug(f"reusing existing result file: {result_file}")
+        df_seg = audformat.utils.read_csv(result_file)
     else:
-        util.error(f"unknown segmenter: {segmenter}")
+        util.debug(
+            f"segmenting {sample_selection}: {df.shape[0]} samples with {method}"
+        )
+        if method == "silero":
+            from nkululeko.segmenting.seg_silero import Silero_segmenter
+            segmenter = Silero_segmenter()
+            df_seg = segmenter.segment_dataframe(df)
+        elif method == "pyannote":
+            from nkululeko.segmenting.seg_pyannote import Pyannote_segmenter
+            segmenter = Pyannote_segmenter(config)
+            df_seg = segmenter.segment_dataframe(df)
+        else:
+            util.error(f"unknown segmenter: {method}")
+        # remove encoded labels
+        target = util.config_val("DATA", "target", None)
+        if "class_label" in df_seg.columns:
+            df_seg = df_seg.drop(columns=[target])
+            df_seg = df_seg.rename(columns={"class_label": target})
+        # save file
+        df_seg.to_csv(f"{expr.data_dir}/{segmented_file}")
     def calc_dur(x):
         starts = x[1]
@@ -100,6 +116,11 @@ def main():
     df_seg["duration"] = df_seg.index.to_series().map(lambda x: calc_dur(x))
     num_before = df.shape[0]
     num_after = df_seg.shape[0]
+    util.debug(
+        f"saved {segmented_file} to {expr.data_dir}, {num_after} samples (was"
+        f" {num_before})"
+    )
     # plot distributions
     from nkululeko.plots import Plots
@@ -110,18 +131,10 @@ def main():
     plots.plot_durations(
         df_seg, "segmented_durations", sample_selection, caption="Segmented durations"
     )
-    print("")
-    # remove encoded labels
-    target = util.config_val("DATA", "target", None)
-    if "class_label" in df_seg.columns:
-        df_seg = df_seg.drop(columns=[target])
-        df_seg = df_seg.rename(columns={"class_label": target})
-    # save file
-    df_seg.to_csv(f"{expr.data_dir}/{segmented_file}")
-    util.debug(
-        f"saved {segmented_file} to {expr.data_dir}, {num_after} samples (was"
-        f" {num_before})"
-    )
+    if method == "pyannote":
+        util.debug(df_seg[["speaker", "duration"]].groupby(["speaker"]).sum())
+        plots.plot_speakers(df_seg, sample_selection)
     glob_conf.report.add_item(
         ReportItem(
             "Data",

{nkululeko-0.92.0.dist-info → nkululeko-0.92.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.92.0
+Version: 0.92.2
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -355,6 +355,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
+Version 0.92.2
+--------------
+* added some output to automatic speaker id
+Version 0.92.1
+--------------
+* added a speaker plot to pyannote results
 Version 0.92.0
 --------------
 * added first version of automatic speaker prediction/segmentation

{nkululeko-0.92.0.dist-info → nkululeko-0.92.2.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=FoMbBrfyOZd4QAw7oIHl3X6-UpsqAKWVDIolCA7qOWs,3196
 nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
 nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
-nkululeko/constants.py,sha256=trIGnE99KWCznIwZEph-SDuz9A8bzck2v0Md4VgZzMY,39
+nkululeko/constants.py,sha256=HBBuhT6kpIHhRMiSBkU07cszGO7kO2A8qTYrN6zH9rw,39
 nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
 nkululeko/demo.py,sha256=4Yzhg6pCPBYPGJrP7JX2TysVosl_R1llpVDKc2P_gUA,4955
 nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
@@ -20,12 +20,12 @@ nkululeko/modelrunner.py,sha256=lJy-xM4QfDDWeL0dLTE_VIb4sYrnd_Z_yJRK3wwohQA,1119
 nkululeko/multidb.py,sha256=sO6OwJn8sn1-C-ig3thsIL8QMWHdV9SnJhDodKjeKrI,6876
 nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
 nkululeko/nkululeko.py,sha256=M7baIq2nAoi6dEoBL4ATEuqAs5U1fvl_hyqAl5DybAQ,2040
-nkululeko/plots.py,sha256=sR061gOsyvuh8UBYS52FINSal4CYNQgvq3B4WOSimDw,23092
+nkululeko/plots.py,sha256=dK3jVwsZufqXgHwAvDYt6uDg_KYk5cfxlP1Fo8kb9HA,23935
 nkululeko/predict.py,sha256=MLnHEyFmSiHLLs-HDczag8Vu3zKF5T1rXLKdZZJ6py8,2083
 nkululeko/resample.py,sha256=akSAjJ3qn-O5NAyLJHVHdsK7MUZPGaZUvM2TwMSmj2M,5194
 nkululeko/runmanager.py,sha256=AswmORVUkCIH0gTx6zEyufvFATQBS8C5TXo2erSNdVg,7611
 nkululeko/scaler.py,sha256=7VOZ4sREMoQtahfETt9RyuR29Fb7PCwxlYVjBbdCVFc,4101
-nkululeko/segment.py,sha256=CEKfvKrvq-XbciluOkgGLLe7DQO9PLSFGw8rMsFpDVQ,4476
+nkululeko/segment.py,sha256=DRjC6b7SeInYgwBcDPXpTXPvXPS-J8kFQO7H095bK80,4945
 nkululeko/syllable_nuclei.py,sha256=5w_naKxNxz66a_qLkraemi2fggM-gWesiiBPS47iFcE,9931
 nkululeko/test.py,sha256=1w624vo5KTzmFC8BUStGlLDmIEAFuJUz7J0W-gp7AxI,1677
 nkululeko/test_predictor.py,sha256=DEHE_D3A6m6KJTrpDKceA1n655t_UZV3WQd57K4a3Ho,2863
@@ -112,9 +112,9 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
 nkululeko/utils/stats.py,sha256=vCRzhCR0Gx5SiJyAGbj1TIto8ocGz58CM5Pr3LltagA,2948
 nkululeko/utils/util.py,sha256=XFZdhCc_LM4EmoZ5tKKaBCQLXclcNmvHwhfT_CXB98c,16723
-nkululeko-0.92.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.92.0.dist-info/METADATA,sha256=-So3jBO4lGif0bmb4KgDxFV4p-EyR7u1eejB8mEhotA,41682
-nkululeko-0.92.0.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
-nkululeko-0.92.0.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
-nkululeko-0.92.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.92.0.dist-info/RECORD,,
+nkululeko-0.92.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.92.2.dist-info/METADATA,sha256=pwdxFGECc-W2WdmnXxgJz6Jy3CbvwzeHASfu7WxFK7g,41832
+nkululeko-0.92.2.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+nkululeko-0.92.2.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
+nkululeko-0.92.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.92.2.dist-info/RECORD,,

{nkululeko-0.92.0.dist-info → nkululeko-0.92.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.92.0.dist-info → nkululeko-0.92.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{nkululeko-0.92.0.dist-info → nkululeko-0.92.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{nkululeko-0.92.0.dist-info → nkululeko-0.92.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.92.0__py3-none-any.whl → 0.92.2__py3-none-any.whl

nkululeko 0.92.0py3-none-any.whl → 0.92.2py3-none-any.whl