nkululeko 0.91.3__py3-none-any.whl → 0.92.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/autopredict/ap_sid.py +22 -17
- nkululeko/constants.py +1 -1
- nkululeko/experiment.py +6 -1
- nkululeko/predict.py +1 -1
- nkululeko/segment.py +7 -7
- nkululeko/segmenting/seg_pyannote.py +129 -0
- {nkululeko-0.91.3.dist-info → nkululeko-0.92.0.dist-info}/METADATA +5 -1
- {nkululeko-0.91.3.dist-info → nkululeko-0.92.0.dist-info}/RECORD +12 -11
- {nkululeko-0.91.3.dist-info → nkululeko-0.92.0.dist-info}/LICENSE +0 -0
- {nkululeko-0.91.3.dist-info → nkululeko-0.92.0.dist-info}/WHEEL +0 -0
- {nkululeko-0.91.3.dist-info → nkululeko-0.92.0.dist-info}/entry_points.txt +0 -0
- {nkululeko-0.91.3.dist-info → nkululeko-0.92.0.dist-info}/top_level.txt +0 -0
nkululeko/autopredict/ap_sid.py
CHANGED
@@ -2,13 +2,12 @@
|
|
2
2
|
A predictor for sid - Speaker ID.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from pyannote.audio import Pipeline
|
6
|
-
|
7
|
-
|
8
5
|
import numpy as np
|
6
|
+
from pyannote.audio import Pipeline
|
7
|
+
import torch
|
9
8
|
|
10
|
-
import nkululeko.glob_conf as glob_conf
|
11
9
|
from nkululeko.feature_extractor import FeatureExtractor
|
10
|
+
import nkululeko.glob_conf as glob_conf
|
12
11
|
from nkululeko.utils.util import Util
|
13
12
|
|
14
13
|
|
@@ -21,23 +20,29 @@ class SIDPredictor:
|
|
21
20
|
def __init__(self, df):
|
22
21
|
self.df = df
|
23
22
|
self.util = Util("sidPredictor")
|
23
|
+
hf_token = self.util.config_val("Model", "hf_token", None)
|
24
|
+
if hf_token is None:
|
25
|
+
self.util.error(
|
26
|
+
"speaker id prediction needs huggingface token: [MODEL][hf_token]"
|
27
|
+
)
|
24
28
|
self.pipeline = Pipeline.from_pretrained(
|
25
29
|
"pyannote/speaker-diarization-3.1",
|
26
|
-
use_auth_token=
|
30
|
+
use_auth_token=hf_token,
|
27
31
|
)
|
32
|
+
device = self.util.config_val("Model", "device", "cpu")
|
33
|
+
self.pipeline.to(torch.device(device))
|
28
34
|
|
29
35
|
def predict(self, split_selection):
|
30
|
-
self.util.debug(f"estimating
|
36
|
+
self.util.debug(f"estimating speaker id for {split_selection} samples")
|
31
37
|
return_df = self.df.copy()
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
)
|
36
|
-
|
37
|
-
# replace missing values by 0
|
38
|
-
result_df = result_df.fillna(0)
|
39
|
-
result_df = result_df.replace(np.nan, 0)
|
40
|
-
result_df.replace([np.inf, -np.inf], 0, inplace=True)
|
41
|
-
pred_vals = result_df.pesq * 100
|
42
|
-
return_df["pesq_pred"] = pred_vals.astype("int") / 100
|
38
|
+
# @todo
|
39
|
+
# 1) concat all audio files
|
40
|
+
# 2) get segmentations with pyannote
|
41
|
+
# 3) map pyannote segments with orginal ones and assign speaker id
|
42
|
+
|
43
43
|
return return_df
|
44
|
+
|
45
|
+
def concat_files(self, df):
|
46
|
+
pass
|
47
|
+
# todo
|
48
|
+
# please use https://audeering.github.io/audiofile/usage.html#read-a-file
|
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.
|
1
|
+
VERSION="0.92.0"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/experiment.py
CHANGED
@@ -439,7 +439,12 @@ class Experiment:
|
|
439
439
|
)
|
440
440
|
targets = self.util.config_val_list("PREDICT", "targets", ["gender"])
|
441
441
|
for target in targets:
|
442
|
-
if target == "
|
442
|
+
if target == "speaker":
|
443
|
+
from nkululeko.autopredict.ap_sid import SIDPredictor
|
444
|
+
|
445
|
+
predictor = SIDPredictor(df)
|
446
|
+
df = predictor.predict(sample_selection)
|
447
|
+
elif target == "gender":
|
443
448
|
from nkululeko.autopredict.ap_gender import GenderPredictor
|
444
449
|
|
445
450
|
predictor = GenderPredictor(df)
|
nkululeko/predict.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# use some model and add automatically predicted labels to train and test splits
|
3
3
|
# then save as a new dataset
|
4
4
|
|
5
|
-
"""This script is used to call the nkululeko PREDICT framework.
|
5
|
+
r"""This script is used to call the nkululeko PREDICT framework.
|
6
6
|
|
7
7
|
It loads a configuration file, creates a new experiment,
|
8
8
|
and performs automatic prediction on the train and test datasets. The predicted labels are added to the datasets and
|
nkululeko/segment.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
"""
|
2
|
-
Segments the samples in the dataset into chunks based on voice activity detection using SILERO VAD [1].
|
1
|
+
"""Segments the samples in the dataset into chunks based on voice activity detection using SILERO VAD [1].
|
3
2
|
|
4
3
|
The segmentation results are saved to a file, and the distributions of the original and
|
5
4
|
segmented durations are plotted.
|
@@ -15,7 +14,7 @@ Example:
|
|
15
14
|
|
16
15
|
References:
|
17
16
|
[1] https://github.com/snakers4/silero-vad
|
18
|
-
|
17
|
+
[2] https://github.com/pyannote/pyannote-audio
|
19
18
|
"""
|
20
19
|
|
21
20
|
import argparse
|
@@ -83,12 +82,15 @@ def main():
|
|
83
82
|
|
84
83
|
segmenter = Silero_segmenter()
|
85
84
|
df_seg = segmenter.segment_dataframe(df)
|
85
|
+
elif segmenter == "pyannote":
|
86
|
+
from nkululeko.segmenting.seg_pyannote import Pyannote_segmenter
|
86
87
|
|
88
|
+
segmenter = Pyannote_segmenter(config)
|
89
|
+
df_seg = segmenter.segment_dataframe(df)
|
87
90
|
else:
|
88
|
-
util.error(f"
|
91
|
+
util.error(f"unknown segmenter: {segmenter}")
|
89
92
|
|
90
93
|
def calc_dur(x):
|
91
|
-
|
92
94
|
starts = x[1]
|
93
95
|
ends = x[2]
|
94
96
|
return (ends - starts).total_seconds()
|
@@ -115,8 +117,6 @@ def main():
|
|
115
117
|
df_seg = df_seg.drop(columns=[target])
|
116
118
|
df_seg = df_seg.rename(columns={"class_label": target})
|
117
119
|
# save file
|
118
|
-
# dataname = "_".join(expr.datasets.keys())
|
119
|
-
# name = f"{dataname}{segment_target}"
|
120
120
|
df_seg.to_csv(f"{expr.data_dir}/{segmented_file}")
|
121
121
|
util.debug(
|
122
122
|
f"saved {segmented_file} to {expr.data_dir}, {num_after} samples (was"
|
@@ -0,0 +1,129 @@
|
|
1
|
+
"""seg_pyannote.py.
|
2
|
+
|
3
|
+
Segment a dataset with the Pyannote segmenter.
|
4
|
+
Also adds speaker ids to the segments.
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import pandas as pd
|
9
|
+
from pyannote.audio import Pipeline
|
10
|
+
import torch
|
11
|
+
from tqdm import tqdm
|
12
|
+
|
13
|
+
import audformat
|
14
|
+
from audformat import segmented_index
|
15
|
+
|
16
|
+
from nkululeko.utils.util import Util
|
17
|
+
|
18
|
+
|
19
|
+
SAMPLING_RATE = 16000
|
20
|
+
|
21
|
+
|
22
|
+
class Pyannote_segmenter:
|
23
|
+
def __init__(self, not_testing=True):
|
24
|
+
# initialize the VAD model
|
25
|
+
torch.set_num_threads(1)
|
26
|
+
self.no_testing = not_testing
|
27
|
+
self.util = Util("pyannote_segmenter")
|
28
|
+
hf_token = self.util.config_val("MODEL", "hf_token", None)
|
29
|
+
if hf_token is None:
|
30
|
+
self.util.error(
|
31
|
+
"speaker id prediction needs huggingface token: [MODEL][hf_token]"
|
32
|
+
)
|
33
|
+
self.pipeline = Pipeline.from_pretrained(
|
34
|
+
"pyannote/speaker-diarization-3.1",
|
35
|
+
use_auth_token=hf_token,
|
36
|
+
)
|
37
|
+
device = self.util.config_val("MODEL", "device", "cpu")
|
38
|
+
if device == "cpu":
|
39
|
+
self.util.warn(
|
40
|
+
"running pyannote on CPU can be really slow, consider using a GPU"
|
41
|
+
)
|
42
|
+
self.pipeline.to(torch.device(device))
|
43
|
+
|
44
|
+
def get_segmentation_simple(self, file):
|
45
|
+
|
46
|
+
annotation = self.pipeline(file[0])
|
47
|
+
|
48
|
+
speakers, starts, ends, files = [], [], [], []
|
49
|
+
# print the result
|
50
|
+
for turn, _, speaker in annotation.itertracks(yield_label=True):
|
51
|
+
start = turn.start
|
52
|
+
end = turn.end
|
53
|
+
speakers.append(speaker)
|
54
|
+
starts.append(start)
|
55
|
+
files.append(file[0])
|
56
|
+
ends.append(end)
|
57
|
+
seg_index = segmented_index(files, starts, ends)
|
58
|
+
return seg_index, speakers
|
59
|
+
|
60
|
+
def get_segmentation(self, file, min_length, max_length):
|
61
|
+
annotation = self.pipeline(file)
|
62
|
+
files, starts, ends, speakers = [], [], [], []
|
63
|
+
for turn, _, speaker in annotation.itertracks(yield_label=True):
|
64
|
+
start = turn.start
|
65
|
+
end = turn.end
|
66
|
+
new_end = end
|
67
|
+
handled = False
|
68
|
+
while end - start > max_length:
|
69
|
+
new_end = start + max_length
|
70
|
+
if end - new_end < min_length:
|
71
|
+
new_end = end
|
72
|
+
files.append(file[0])
|
73
|
+
starts.append(start)
|
74
|
+
ends.append(new_end)
|
75
|
+
speakers.append(speaker)
|
76
|
+
start += max_length
|
77
|
+
handled = True
|
78
|
+
if not handled and end - start > min_length:
|
79
|
+
files.append(file[0])
|
80
|
+
starts.append(start)
|
81
|
+
ends.append(end)
|
82
|
+
speakers.append(speaker)
|
83
|
+
seg_index = segmented_index(files, starts, ends)
|
84
|
+
return seg_index, speakers
|
85
|
+
|
86
|
+
def segment_dataframe(self, df):
|
87
|
+
dfs = []
|
88
|
+
max_length = eval(self.util.config_val("SEGMENT", "max_length", "False"))
|
89
|
+
if max_length:
|
90
|
+
if self.no_testing:
|
91
|
+
min_length = float(self.util.config_val("SEGMENT", "min_length", 2))
|
92
|
+
else:
|
93
|
+
min_length = 2
|
94
|
+
self.util.debug(f"segmenting with max length: {max_length+min_length}")
|
95
|
+
for file, values in tqdm(df.iterrows()):
|
96
|
+
if max_length:
|
97
|
+
index, speakers = self.get_segmentation(file, min_length, max_length)
|
98
|
+
else:
|
99
|
+
index, speakers = self.get_segmentation_simple(file)
|
100
|
+
df = pd.DataFrame(
|
101
|
+
values.to_dict(),
|
102
|
+
index,
|
103
|
+
)
|
104
|
+
df["speaker"] = speakers
|
105
|
+
dfs.append(df)
|
106
|
+
return audformat.utils.concat(dfs)
|
107
|
+
|
108
|
+
|
109
|
+
def main():
|
110
|
+
files = pd.Series(["test_wavs/very_long.wav"])
|
111
|
+
df_sample = pd.DataFrame(index=files)
|
112
|
+
df_sample["target"] = "anger"
|
113
|
+
df_sample.index = audformat.utils.to_segmented_index(
|
114
|
+
df_sample.index, allow_nat=False
|
115
|
+
)
|
116
|
+
segmenter = Pyannote_segmenter(not_testing=False)
|
117
|
+
df_seg = segmenter.segment_dataframe(df_sample)
|
118
|
+
|
119
|
+
def calc_dur(x):
|
120
|
+
starts = x[1]
|
121
|
+
ends = x[2]
|
122
|
+
return (ends - starts).total_seconds()
|
123
|
+
|
124
|
+
df_seg["duration"] = df_seg.index.to_series().map(lambda x: calc_dur(x))
|
125
|
+
print(df_seg.head(100))
|
126
|
+
|
127
|
+
|
128
|
+
if __name__ == "__main__":
|
129
|
+
main()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.92.0
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -355,6 +355,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
355
355
|
Changelog
|
356
356
|
=========
|
357
357
|
|
358
|
+
Version 0.92.0
|
359
|
+
--------------
|
360
|
+
* added first version of automatic speaker prediction/segmentation
|
361
|
+
|
358
362
|
Version 0.91.3
|
359
363
|
--------------
|
360
364
|
* some additions for robustness
|
@@ -2,13 +2,13 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=FoMbBrfyOZd4QAw7oIHl3X6-UpsqAKWVDIolCA7qOWs,3196
|
3
3
|
nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
|
4
4
|
nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=trIGnE99KWCznIwZEph-SDuz9A8bzck2v0Md4VgZzMY,39
|
6
6
|
nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
|
7
7
|
nkululeko/demo.py,sha256=4Yzhg6pCPBYPGJrP7JX2TysVosl_R1llpVDKc2P_gUA,4955
|
8
8
|
nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
|
9
9
|
nkululeko/demo_predictor.py,sha256=lDF-xOxRdEAclOmbepAYg-BQXQdGkHfq2n74PTIoop8,4872
|
10
10
|
nkululeko/ensemble.py,sha256=71V-rre61H3J4sh7lu-OTo4I2_g7mm_rQxwW1ARDHgY,12782
|
11
|
-
nkululeko/experiment.py,sha256=
|
11
|
+
nkululeko/experiment.py,sha256=h3DS-k6vk5juXa3HJXI7Z4vvnNspO4qj5SJ1o1Z3PIk,31860
|
12
12
|
nkululeko/explore.py,sha256=Y5lPPychnI-7fyP8zvwVb9P09fvprbUPOofOppuABYQ,3658
|
13
13
|
nkululeko/export.py,sha256=U-V4acxtuL6qKt6oAsVcM5TTeWogYUJ3GU-lA6rq6d4,4336
|
14
14
|
nkululeko/feature_extractor.py,sha256=UnspIWz3XrNhKnBBhWZkH2bHvD-sROtrQVqB1JvkUyw,4088
|
@@ -21,11 +21,11 @@ nkululeko/multidb.py,sha256=sO6OwJn8sn1-C-ig3thsIL8QMWHdV9SnJhDodKjeKrI,6876
|
|
21
21
|
nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
|
22
22
|
nkululeko/nkululeko.py,sha256=M7baIq2nAoi6dEoBL4ATEuqAs5U1fvl_hyqAl5DybAQ,2040
|
23
23
|
nkululeko/plots.py,sha256=sR061gOsyvuh8UBYS52FINSal4CYNQgvq3B4WOSimDw,23092
|
24
|
-
nkululeko/predict.py,sha256=
|
24
|
+
nkululeko/predict.py,sha256=MLnHEyFmSiHLLs-HDczag8Vu3zKF5T1rXLKdZZJ6py8,2083
|
25
25
|
nkululeko/resample.py,sha256=akSAjJ3qn-O5NAyLJHVHdsK7MUZPGaZUvM2TwMSmj2M,5194
|
26
26
|
nkululeko/runmanager.py,sha256=AswmORVUkCIH0gTx6zEyufvFATQBS8C5TXo2erSNdVg,7611
|
27
27
|
nkululeko/scaler.py,sha256=7VOZ4sREMoQtahfETt9RyuR29Fb7PCwxlYVjBbdCVFc,4101
|
28
|
-
nkululeko/segment.py,sha256=
|
28
|
+
nkululeko/segment.py,sha256=CEKfvKrvq-XbciluOkgGLLe7DQO9PLSFGw8rMsFpDVQ,4476
|
29
29
|
nkululeko/syllable_nuclei.py,sha256=5w_naKxNxz66a_qLkraemi2fggM-gWesiiBPS47iFcE,9931
|
30
30
|
nkululeko/test.py,sha256=1w624vo5KTzmFC8BUStGlLDmIEAFuJUz7J0W-gp7AxI,1677
|
31
31
|
nkululeko/test_predictor.py,sha256=DEHE_D3A6m6KJTrpDKceA1n655t_UZV3WQd57K4a3Ho,2863
|
@@ -43,7 +43,7 @@ nkululeko/autopredict/ap_gender.py,sha256=b6oTqHKVwOnYh4YlKbuMflssS4HJqs_c1ayusa
|
|
43
43
|
nkululeko/autopredict/ap_mos.py,sha256=e4hmgb0Yf1_AbC5P0CqXJIvufjhbTrqmI5goARxrY0Y,1107
|
44
44
|
nkululeko/autopredict/ap_pesq.py,sha256=mRt3Loucaoy4vJxwfuxUt0fP88bMGvkmrLCEpKEXWp0,1140
|
45
45
|
nkululeko/autopredict/ap_sdr.py,sha256=VQ2UkxOO3ipqYNNjFwKgEaGCk8IzLI5lX_2tZFLIvTY,1188
|
46
|
-
nkululeko/autopredict/ap_sid.py,sha256=
|
46
|
+
nkululeko/autopredict/ap_sid.py,sha256=87LXMHzJ8jt2q9dUtPJd_nJi_XOcFoqpbva-BT4UJN0,1393
|
47
47
|
nkululeko/autopredict/ap_snr.py,sha256=AiTU8-7CMEeowmYkMO19lw1HCb1yTXC6KeulNf8gOqw,1110
|
48
48
|
nkululeko/autopredict/ap_stoi.py,sha256=UEQg1ZV0meAsxgdWB8ieRs9GPXHqArmsaOyCGRwpcnA,1187
|
49
49
|
nkululeko/autopredict/ap_valence.py,sha256=WrW4Ltqi_odW49_4QEVKkfnrcztLIVZ4cXIEHu4dBN8,1026
|
@@ -106,14 +106,15 @@ nkululeko/reporting/reporter.py,sha256=4OlYZAParkfJKO_aAyxqVpLc21zxZ-jDhtJKIMeUs
|
|
106
106
|
nkululeko/reporting/result.py,sha256=G63a2tHCwHhM6NBJgYzsWKWJm4Yu3r4hsCHA2Km7eHU,1073
|
107
107
|
nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
108
108
|
nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=b3t0zdpJYofKWMyKRMtMMX91xeR-k8d5pbnNaQHcsOE,1902
|
109
|
+
nkululeko/segmenting/seg_pyannote.py,sha256=6IPbgjnGOz9juzEKDTZN3PSipX4t6Mz-DILAx3rp5do,4216
|
109
110
|
nkululeko/segmenting/seg_silero.py,sha256=ulodnvtRq5MLHDxy_RmAK4tJg6h1d-mPq-uCPFkGVKg,4258
|
110
111
|
nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
111
112
|
nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
|
112
113
|
nkululeko/utils/stats.py,sha256=vCRzhCR0Gx5SiJyAGbj1TIto8ocGz58CM5Pr3LltagA,2948
|
113
114
|
nkululeko/utils/util.py,sha256=XFZdhCc_LM4EmoZ5tKKaBCQLXclcNmvHwhfT_CXB98c,16723
|
114
|
-
nkululeko-0.
|
115
|
-
nkululeko-0.
|
116
|
-
nkululeko-0.
|
117
|
-
nkululeko-0.
|
118
|
-
nkululeko-0.
|
119
|
-
nkululeko-0.
|
115
|
+
nkululeko-0.92.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
116
|
+
nkululeko-0.92.0.dist-info/METADATA,sha256=-So3jBO4lGif0bmb4KgDxFV4p-EyR7u1eejB8mEhotA,41682
|
117
|
+
nkululeko-0.92.0.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
118
|
+
nkululeko-0.92.0.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
|
119
|
+
nkululeko-0.92.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
120
|
+
nkululeko-0.92.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|