nkululeko 0.92.0__py3-none-any.whl → 0.92.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/plots.py +27 -4
- nkululeko/segment.py +38 -25
- {nkululeko-0.92.0.dist-info → nkululeko-0.92.2.dist-info}/METADATA +9 -1
- {nkululeko-0.92.0.dist-info → nkululeko-0.92.2.dist-info}/RECORD +9 -9
- {nkululeko-0.92.0.dist-info → nkululeko-0.92.2.dist-info}/LICENSE +0 -0
- {nkululeko-0.92.0.dist-info → nkululeko-0.92.2.dist-info}/WHEEL +0 -0
- {nkululeko-0.92.0.dist-info → nkululeko-0.92.2.dist-info}/entry_points.txt +0 -0
- {nkululeko-0.92.0.dist-info → nkululeko-0.92.2.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.92.
|
1
|
+
VERSION="0.92.2"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/plots.py
CHANGED
@@ -4,14 +4,14 @@ import ast
|
|
4
4
|
import matplotlib.pyplot as plt
|
5
5
|
import numpy as np
|
6
6
|
import pandas as pd
|
7
|
-
import seaborn as sns
|
8
7
|
from scipy import stats
|
8
|
+
import seaborn as sns
|
9
9
|
from sklearn.manifold import TSNE
|
10
10
|
|
11
11
|
import nkululeko.glob_conf as glob_conf
|
12
|
-
import nkululeko.utils.stats as su
|
13
12
|
from nkululeko.reporting.defines import Header
|
14
13
|
from nkululeko.reporting.report_item import ReportItem
|
14
|
+
import nkululeko.utils.stats as su
|
15
15
|
from nkululeko.utils.util import Util
|
16
16
|
|
17
17
|
|
@@ -30,8 +30,6 @@ class Plots:
|
|
30
30
|
df_speaker["samplenum"] = df_speaker.shape[0]
|
31
31
|
df_speakers = pd.concat([df_speakers, df_speaker.head(1)])
|
32
32
|
# plot the distribution of samples per speaker
|
33
|
-
# one up because of the runs
|
34
|
-
fig_dir = self.util.get_path("fig_dir") + "../"
|
35
33
|
self.util.debug("plotting samples per speaker")
|
36
34
|
if "gender" in df_speakers:
|
37
35
|
filename = "samples_value_counts"
|
@@ -319,6 +317,31 @@ class Plots:
|
|
319
317
|
img_path = f"{fig_dir}{filename}_{sample_selection}.{self.format}"
|
320
318
|
plt.savefig(img_path)
|
321
319
|
plt.close(fig)
|
320
|
+
self.util.debug(f"plotted durations to {img_path}")
|
321
|
+
glob_conf.report.add_item(
|
322
|
+
ReportItem(
|
323
|
+
Header.HEADER_EXPLORE,
|
324
|
+
caption,
|
325
|
+
title,
|
326
|
+
img_path,
|
327
|
+
)
|
328
|
+
)
|
329
|
+
|
330
|
+
def plot_speakers(self, df, sample_selection):
|
331
|
+
filename = "speakers"
|
332
|
+
caption = "speakers"
|
333
|
+
# one up because of the runs
|
334
|
+
fig_dir = self.util.get_path("fig_dir") + "../"
|
335
|
+
sns.set_style("whitegrid") # Set style for chart
|
336
|
+
ax = df["speaker"].value_counts().plot(kind="pie", autopct="%1.1f%%")
|
337
|
+
title = f"Speaker distr. for {sample_selection} {df.shape[0]}."
|
338
|
+
ax.set_title(title)
|
339
|
+
fig = ax.figure
|
340
|
+
# plt.tight_layout()
|
341
|
+
img_path = f"{fig_dir}{filename}_{sample_selection}.{self.format}"
|
342
|
+
plt.savefig(img_path)
|
343
|
+
plt.close(fig)
|
344
|
+
self.util.debug(f"plotted speakers to {img_path}")
|
322
345
|
glob_conf.report.add_item(
|
323
346
|
ReportItem(
|
324
347
|
Header.HEADER_EXPLORE,
|
nkululeko/segment.py
CHANGED
@@ -23,6 +23,8 @@ import os
|
|
23
23
|
|
24
24
|
import pandas as pd
|
25
25
|
|
26
|
+
import audformat
|
27
|
+
|
26
28
|
from nkululeko.constants import VERSION
|
27
29
|
from nkululeko.experiment import Experiment
|
28
30
|
import nkululeko.glob_conf as glob_conf
|
@@ -63,7 +65,7 @@ def main():
|
|
63
65
|
# segment
|
64
66
|
segmented_file = util.config_val("SEGMENT", "result", "segmented.csv")
|
65
67
|
|
66
|
-
|
68
|
+
method = util.config_val("SEGMENT", "method", "silero")
|
67
69
|
sample_selection = util.config_val("SEGMENT", "sample_selection", "all")
|
68
70
|
if sample_selection == "all":
|
69
71
|
df = pd.concat([expr.df_train, expr.df_test])
|
@@ -76,19 +78,33 @@ def main():
|
|
76
78
|
f"unknown segmentation selection specifier {sample_selection},"
|
77
79
|
" should be [all | train | test]"
|
78
80
|
)
|
79
|
-
|
80
|
-
if
|
81
|
-
|
82
|
-
|
83
|
-
segmenter = Silero_segmenter()
|
84
|
-
df_seg = segmenter.segment_dataframe(df)
|
85
|
-
elif segmenter == "pyannote":
|
86
|
-
from nkululeko.segmenting.seg_pyannote import Pyannote_segmenter
|
87
|
-
|
88
|
-
segmenter = Pyannote_segmenter(config)
|
89
|
-
df_seg = segmenter.segment_dataframe(df)
|
81
|
+
result_file = f"{expr.data_dir}/{segmented_file}"
|
82
|
+
if os.path.exists(result_file):
|
83
|
+
util.debug(f"reusing existing result file: {result_file}")
|
84
|
+
df_seg = audformat.utils.read_csv(result_file)
|
90
85
|
else:
|
91
|
-
util.
|
86
|
+
util.debug(
|
87
|
+
f"segmenting {sample_selection}: {df.shape[0]} samples with {method}"
|
88
|
+
)
|
89
|
+
if method == "silero":
|
90
|
+
from nkululeko.segmenting.seg_silero import Silero_segmenter
|
91
|
+
|
92
|
+
segmenter = Silero_segmenter()
|
93
|
+
df_seg = segmenter.segment_dataframe(df)
|
94
|
+
elif method == "pyannote":
|
95
|
+
from nkululeko.segmenting.seg_pyannote import Pyannote_segmenter
|
96
|
+
|
97
|
+
segmenter = Pyannote_segmenter(config)
|
98
|
+
df_seg = segmenter.segment_dataframe(df)
|
99
|
+
else:
|
100
|
+
util.error(f"unknown segmenter: {method}")
|
101
|
+
# remove encoded labels
|
102
|
+
target = util.config_val("DATA", "target", None)
|
103
|
+
if "class_label" in df_seg.columns:
|
104
|
+
df_seg = df_seg.drop(columns=[target])
|
105
|
+
df_seg = df_seg.rename(columns={"class_label": target})
|
106
|
+
# save file
|
107
|
+
df_seg.to_csv(f"{expr.data_dir}/{segmented_file}")
|
92
108
|
|
93
109
|
def calc_dur(x):
|
94
110
|
starts = x[1]
|
@@ -100,6 +116,11 @@ def main():
|
|
100
116
|
df_seg["duration"] = df_seg.index.to_series().map(lambda x: calc_dur(x))
|
101
117
|
num_before = df.shape[0]
|
102
118
|
num_after = df_seg.shape[0]
|
119
|
+
util.debug(
|
120
|
+
f"saved {segmented_file} to {expr.data_dir}, {num_after} samples (was"
|
121
|
+
f" {num_before})"
|
122
|
+
)
|
123
|
+
|
103
124
|
# plot distributions
|
104
125
|
from nkululeko.plots import Plots
|
105
126
|
|
@@ -110,18 +131,10 @@ def main():
|
|
110
131
|
plots.plot_durations(
|
111
132
|
df_seg, "segmented_durations", sample_selection, caption="Segmented durations"
|
112
133
|
)
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
df_seg = df_seg.drop(columns=[target])
|
118
|
-
df_seg = df_seg.rename(columns={"class_label": target})
|
119
|
-
# save file
|
120
|
-
df_seg.to_csv(f"{expr.data_dir}/{segmented_file}")
|
121
|
-
util.debug(
|
122
|
-
f"saved {segmented_file} to {expr.data_dir}, {num_after} samples (was"
|
123
|
-
f" {num_before})"
|
124
|
-
)
|
134
|
+
if method == "pyannote":
|
135
|
+
util.debug(df_seg[["speaker", "duration"]].groupby(["speaker"]).sum())
|
136
|
+
plots.plot_speakers(df_seg, sample_selection)
|
137
|
+
|
125
138
|
glob_conf.report.add_item(
|
126
139
|
ReportItem(
|
127
140
|
"Data",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.92.
|
3
|
+
Version: 0.92.2
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -355,6 +355,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
355
355
|
Changelog
|
356
356
|
=========
|
357
357
|
|
358
|
+
Version 0.92.2
|
359
|
+
--------------
|
360
|
+
* added some output to automatic speaker id
|
361
|
+
|
362
|
+
Version 0.92.1
|
363
|
+
--------------
|
364
|
+
* added a speaker plot to pyannote results
|
365
|
+
|
358
366
|
Version 0.92.0
|
359
367
|
--------------
|
360
368
|
* added first version of automatic speaker prediction/segmentation
|
@@ -2,7 +2,7 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=FoMbBrfyOZd4QAw7oIHl3X6-UpsqAKWVDIolCA7qOWs,3196
|
3
3
|
nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
|
4
4
|
nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=HBBuhT6kpIHhRMiSBkU07cszGO7kO2A8qTYrN6zH9rw,39
|
6
6
|
nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
|
7
7
|
nkululeko/demo.py,sha256=4Yzhg6pCPBYPGJrP7JX2TysVosl_R1llpVDKc2P_gUA,4955
|
8
8
|
nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
|
@@ -20,12 +20,12 @@ nkululeko/modelrunner.py,sha256=lJy-xM4QfDDWeL0dLTE_VIb4sYrnd_Z_yJRK3wwohQA,1119
|
|
20
20
|
nkululeko/multidb.py,sha256=sO6OwJn8sn1-C-ig3thsIL8QMWHdV9SnJhDodKjeKrI,6876
|
21
21
|
nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
|
22
22
|
nkululeko/nkululeko.py,sha256=M7baIq2nAoi6dEoBL4ATEuqAs5U1fvl_hyqAl5DybAQ,2040
|
23
|
-
nkululeko/plots.py,sha256=
|
23
|
+
nkululeko/plots.py,sha256=dK3jVwsZufqXgHwAvDYt6uDg_KYk5cfxlP1Fo8kb9HA,23935
|
24
24
|
nkululeko/predict.py,sha256=MLnHEyFmSiHLLs-HDczag8Vu3zKF5T1rXLKdZZJ6py8,2083
|
25
25
|
nkululeko/resample.py,sha256=akSAjJ3qn-O5NAyLJHVHdsK7MUZPGaZUvM2TwMSmj2M,5194
|
26
26
|
nkululeko/runmanager.py,sha256=AswmORVUkCIH0gTx6zEyufvFATQBS8C5TXo2erSNdVg,7611
|
27
27
|
nkululeko/scaler.py,sha256=7VOZ4sREMoQtahfETt9RyuR29Fb7PCwxlYVjBbdCVFc,4101
|
28
|
-
nkululeko/segment.py,sha256=
|
28
|
+
nkululeko/segment.py,sha256=DRjC6b7SeInYgwBcDPXpTXPvXPS-J8kFQO7H095bK80,4945
|
29
29
|
nkululeko/syllable_nuclei.py,sha256=5w_naKxNxz66a_qLkraemi2fggM-gWesiiBPS47iFcE,9931
|
30
30
|
nkululeko/test.py,sha256=1w624vo5KTzmFC8BUStGlLDmIEAFuJUz7J0W-gp7AxI,1677
|
31
31
|
nkululeko/test_predictor.py,sha256=DEHE_D3A6m6KJTrpDKceA1n655t_UZV3WQd57K4a3Ho,2863
|
@@ -112,9 +112,9 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
112
112
|
nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
|
113
113
|
nkululeko/utils/stats.py,sha256=vCRzhCR0Gx5SiJyAGbj1TIto8ocGz58CM5Pr3LltagA,2948
|
114
114
|
nkululeko/utils/util.py,sha256=XFZdhCc_LM4EmoZ5tKKaBCQLXclcNmvHwhfT_CXB98c,16723
|
115
|
-
nkululeko-0.92.
|
116
|
-
nkululeko-0.92.
|
117
|
-
nkululeko-0.92.
|
118
|
-
nkululeko-0.92.
|
119
|
-
nkululeko-0.92.
|
120
|
-
nkululeko-0.92.
|
115
|
+
nkululeko-0.92.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
116
|
+
nkululeko-0.92.2.dist-info/METADATA,sha256=pwdxFGECc-W2WdmnXxgJz6Jy3CbvwzeHASfu7WxFK7g,41832
|
117
|
+
nkululeko-0.92.2.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
118
|
+
nkululeko-0.92.2.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
|
119
|
+
nkululeko-0.92.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
120
|
+
nkululeko-0.92.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|