nkululeko 0.91.3__py3-none-any.whl → 0.92.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,13 +2,12 @@
2
2
  A predictor for sid - Speaker ID.
3
3
  """
4
4
 
5
- from pyannote.audio import Pipeline
6
-
7
-
8
5
  import numpy as np
6
+ from pyannote.audio import Pipeline
7
+ import torch
9
8
 
10
- import nkululeko.glob_conf as glob_conf
11
9
  from nkululeko.feature_extractor import FeatureExtractor
10
+ import nkululeko.glob_conf as glob_conf
12
11
  from nkululeko.utils.util import Util
13
12
 
14
13
 
@@ -21,23 +20,29 @@ class SIDPredictor:
21
20
  def __init__(self, df):
22
21
  self.df = df
23
22
  self.util = Util("sidPredictor")
23
+ hf_token = self.util.config_val("Model", "hf_token", None)
24
+ if hf_token is None:
25
+ self.util.error(
26
+ "speaker id prediction needs huggingface token: [MODEL][hf_token]"
27
+ )
24
28
  self.pipeline = Pipeline.from_pretrained(
25
29
  "pyannote/speaker-diarization-3.1",
26
- use_auth_token="HUGGINGFACE_ACCESS_TOKEN_GOES_HERE",
30
+ use_auth_token=hf_token,
27
31
  )
32
+ device = self.util.config_val("Model", "device", "cpu")
33
+ self.pipeline.to(torch.device(device))
28
34
 
29
35
  def predict(self, split_selection):
30
- self.util.debug(f"estimating PESQ for {split_selection} samples")
36
+ self.util.debug(f"estimating speaker id for {split_selection} samples")
31
37
  return_df = self.df.copy()
32
- feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
33
- self.feature_extractor = FeatureExtractor(
34
- self.df, ["squim"], feats_name, split_selection
35
- )
36
- result_df = self.feature_extractor.extract()
37
- # replace missing values by 0
38
- result_df = result_df.fillna(0)
39
- result_df = result_df.replace(np.nan, 0)
40
- result_df.replace([np.inf, -np.inf], 0, inplace=True)
41
- pred_vals = result_df.pesq * 100
42
- return_df["pesq_pred"] = pred_vals.astype("int") / 100
38
+ # @todo
39
+ # 1) concat all audio files
40
+ # 2) get segmentations with pyannote
41
+ # 3) map pyannote segments with orginal ones and assign speaker id
42
+
43
43
  return return_df
44
+
45
+ def concat_files(self, df):
46
+ pass
47
+ # todo
48
+ # please use https://audeering.github.io/audiofile/usage.html#read-a-file
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.91.3"
1
+ VERSION="0.92.0"
2
2
  SAMPLING_RATE = 16000
nkululeko/experiment.py CHANGED
@@ -439,7 +439,12 @@ class Experiment:
439
439
  )
440
440
  targets = self.util.config_val_list("PREDICT", "targets", ["gender"])
441
441
  for target in targets:
442
- if target == "gender":
442
+ if target == "speaker":
443
+ from nkululeko.autopredict.ap_sid import SIDPredictor
444
+
445
+ predictor = SIDPredictor(df)
446
+ df = predictor.predict(sample_selection)
447
+ elif target == "gender":
443
448
  from nkululeko.autopredict.ap_gender import GenderPredictor
444
449
 
445
450
  predictor = GenderPredictor(df)
nkululeko/predict.py CHANGED
@@ -2,7 +2,7 @@
2
2
  # use some model and add automatically predicted labels to train and test splits
3
3
  # then save as a new dataset
4
4
 
5
- """This script is used to call the nkululeko PREDICT framework.
5
+ r"""This script is used to call the nkululeko PREDICT framework.
6
6
 
7
7
  It loads a configuration file, creates a new experiment,
8
8
  and performs automatic prediction on the train and test datasets. The predicted labels are added to the datasets and
nkululeko/segment.py CHANGED
@@ -1,5 +1,4 @@
1
- """
2
- Segments the samples in the dataset into chunks based on voice activity detection using SILERO VAD [1].
1
+ """Segments the samples in the dataset into chunks based on voice activity detection using SILERO VAD [1].
3
2
 
4
3
  The segmentation results are saved to a file, and the distributions of the original and
5
4
  segmented durations are plotted.
@@ -15,7 +14,7 @@ Example:
15
14
 
16
15
  References:
17
16
  [1] https://github.com/snakers4/silero-vad
18
-
17
+ [2] https://github.com/pyannote/pyannote-audio
19
18
  """
20
19
 
21
20
  import argparse
@@ -83,12 +82,15 @@ def main():
83
82
 
84
83
  segmenter = Silero_segmenter()
85
84
  df_seg = segmenter.segment_dataframe(df)
85
+ elif segmenter == "pyannote":
86
+ from nkululeko.segmenting.seg_pyannote import Pyannote_segmenter
86
87
 
88
+ segmenter = Pyannote_segmenter(config)
89
+ df_seg = segmenter.segment_dataframe(df)
87
90
  else:
88
- util.error(f"unkown segmenter: {segmenter}")
91
+ util.error(f"unknown segmenter: {segmenter}")
89
92
 
90
93
  def calc_dur(x):
91
-
92
94
  starts = x[1]
93
95
  ends = x[2]
94
96
  return (ends - starts).total_seconds()
@@ -115,8 +117,6 @@ def main():
115
117
  df_seg = df_seg.drop(columns=[target])
116
118
  df_seg = df_seg.rename(columns={"class_label": target})
117
119
  # save file
118
- # dataname = "_".join(expr.datasets.keys())
119
- # name = f"{dataname}{segment_target}"
120
120
  df_seg.to_csv(f"{expr.data_dir}/{segmented_file}")
121
121
  util.debug(
122
122
  f"saved {segmented_file} to {expr.data_dir}, {num_after} samples (was"
@@ -0,0 +1,129 @@
1
+ """seg_pyannote.py.
2
+
3
+ Segment a dataset with the Pyannote segmenter.
4
+ Also adds speaker ids to the segments.
5
+
6
+ """
7
+
8
+ import pandas as pd
9
+ from pyannote.audio import Pipeline
10
+ import torch
11
+ from tqdm import tqdm
12
+
13
+ import audformat
14
+ from audformat import segmented_index
15
+
16
+ from nkululeko.utils.util import Util
17
+
18
+
19
+ SAMPLING_RATE = 16000
20
+
21
+
22
+ class Pyannote_segmenter:
23
+ def __init__(self, not_testing=True):
24
+ # initialize the VAD model
25
+ torch.set_num_threads(1)
26
+ self.no_testing = not_testing
27
+ self.util = Util("pyannote_segmenter")
28
+ hf_token = self.util.config_val("MODEL", "hf_token", None)
29
+ if hf_token is None:
30
+ self.util.error(
31
+ "speaker id prediction needs huggingface token: [MODEL][hf_token]"
32
+ )
33
+ self.pipeline = Pipeline.from_pretrained(
34
+ "pyannote/speaker-diarization-3.1",
35
+ use_auth_token=hf_token,
36
+ )
37
+ device = self.util.config_val("MODEL", "device", "cpu")
38
+ if device == "cpu":
39
+ self.util.warn(
40
+ "running pyannote on CPU can be really slow, consider using a GPU"
41
+ )
42
+ self.pipeline.to(torch.device(device))
43
+
44
+ def get_segmentation_simple(self, file):
45
+
46
+ annotation = self.pipeline(file[0])
47
+
48
+ speakers, starts, ends, files = [], [], [], []
49
+ # print the result
50
+ for turn, _, speaker in annotation.itertracks(yield_label=True):
51
+ start = turn.start
52
+ end = turn.end
53
+ speakers.append(speaker)
54
+ starts.append(start)
55
+ files.append(file[0])
56
+ ends.append(end)
57
+ seg_index = segmented_index(files, starts, ends)
58
+ return seg_index, speakers
59
+
60
+ def get_segmentation(self, file, min_length, max_length):
61
+ annotation = self.pipeline(file)
62
+ files, starts, ends, speakers = [], [], [], []
63
+ for turn, _, speaker in annotation.itertracks(yield_label=True):
64
+ start = turn.start
65
+ end = turn.end
66
+ new_end = end
67
+ handled = False
68
+ while end - start > max_length:
69
+ new_end = start + max_length
70
+ if end - new_end < min_length:
71
+ new_end = end
72
+ files.append(file[0])
73
+ starts.append(start)
74
+ ends.append(new_end)
75
+ speakers.append(speaker)
76
+ start += max_length
77
+ handled = True
78
+ if not handled and end - start > min_length:
79
+ files.append(file[0])
80
+ starts.append(start)
81
+ ends.append(end)
82
+ speakers.append(speaker)
83
+ seg_index = segmented_index(files, starts, ends)
84
+ return seg_index, speakers
85
+
86
+ def segment_dataframe(self, df):
87
+ dfs = []
88
+ max_length = eval(self.util.config_val("SEGMENT", "max_length", "False"))
89
+ if max_length:
90
+ if self.no_testing:
91
+ min_length = float(self.util.config_val("SEGMENT", "min_length", 2))
92
+ else:
93
+ min_length = 2
94
+ self.util.debug(f"segmenting with max length: {max_length+min_length}")
95
+ for file, values in tqdm(df.iterrows()):
96
+ if max_length:
97
+ index, speakers = self.get_segmentation(file, min_length, max_length)
98
+ else:
99
+ index, speakers = self.get_segmentation_simple(file)
100
+ df = pd.DataFrame(
101
+ values.to_dict(),
102
+ index,
103
+ )
104
+ df["speaker"] = speakers
105
+ dfs.append(df)
106
+ return audformat.utils.concat(dfs)
107
+
108
+
109
+ def main():
110
+ files = pd.Series(["test_wavs/very_long.wav"])
111
+ df_sample = pd.DataFrame(index=files)
112
+ df_sample["target"] = "anger"
113
+ df_sample.index = audformat.utils.to_segmented_index(
114
+ df_sample.index, allow_nat=False
115
+ )
116
+ segmenter = Pyannote_segmenter(not_testing=False)
117
+ df_seg = segmenter.segment_dataframe(df_sample)
118
+
119
+ def calc_dur(x):
120
+ starts = x[1]
121
+ ends = x[2]
122
+ return (ends - starts).total_seconds()
123
+
124
+ df_seg["duration"] = df_seg.index.to_series().map(lambda x: calc_dur(x))
125
+ print(df_seg.head(100))
126
+
127
+
128
+ if __name__ == "__main__":
129
+ main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.91.3
3
+ Version: 0.92.0
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -355,6 +355,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
355
355
  Changelog
356
356
  =========
357
357
 
358
+ Version 0.92.0
359
+ --------------
360
+ * added first version of automatic speaker prediction/segmentation
361
+
358
362
  Version 0.91.3
359
363
  --------------
360
364
  * some additions for robustness
@@ -2,13 +2,13 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=FoMbBrfyOZd4QAw7oIHl3X6-UpsqAKWVDIolCA7qOWs,3196
3
3
  nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
4
4
  nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
5
- nkululeko/constants.py,sha256=tT5Su7q7ufkiVtsEMOV5FgJVkE2U8idmrVxrNi4gFmc,39
5
+ nkululeko/constants.py,sha256=trIGnE99KWCznIwZEph-SDuz9A8bzck2v0Md4VgZzMY,39
6
6
  nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
7
7
  nkululeko/demo.py,sha256=4Yzhg6pCPBYPGJrP7JX2TysVosl_R1llpVDKc2P_gUA,4955
8
8
  nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
9
9
  nkululeko/demo_predictor.py,sha256=lDF-xOxRdEAclOmbepAYg-BQXQdGkHfq2n74PTIoop8,4872
10
10
  nkululeko/ensemble.py,sha256=71V-rre61H3J4sh7lu-OTo4I2_g7mm_rQxwW1ARDHgY,12782
11
- nkululeko/experiment.py,sha256=kRz3diGyupXneXFBLmmUm6BV-sGSqQJv44Zojn0Bhcs,31649
11
+ nkululeko/experiment.py,sha256=h3DS-k6vk5juXa3HJXI7Z4vvnNspO4qj5SJ1o1Z3PIk,31860
12
12
  nkululeko/explore.py,sha256=Y5lPPychnI-7fyP8zvwVb9P09fvprbUPOofOppuABYQ,3658
13
13
  nkululeko/export.py,sha256=U-V4acxtuL6qKt6oAsVcM5TTeWogYUJ3GU-lA6rq6d4,4336
14
14
  nkululeko/feature_extractor.py,sha256=UnspIWz3XrNhKnBBhWZkH2bHvD-sROtrQVqB1JvkUyw,4088
@@ -21,11 +21,11 @@ nkululeko/multidb.py,sha256=sO6OwJn8sn1-C-ig3thsIL8QMWHdV9SnJhDodKjeKrI,6876
21
21
  nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
22
22
  nkululeko/nkululeko.py,sha256=M7baIq2nAoi6dEoBL4ATEuqAs5U1fvl_hyqAl5DybAQ,2040
23
23
  nkululeko/plots.py,sha256=sR061gOsyvuh8UBYS52FINSal4CYNQgvq3B4WOSimDw,23092
24
- nkululeko/predict.py,sha256=b35YOqovGb5PLDz0nDuhJGykEAPq2Y45R9lzxJZMuMU,2083
24
+ nkululeko/predict.py,sha256=MLnHEyFmSiHLLs-HDczag8Vu3zKF5T1rXLKdZZJ6py8,2083
25
25
  nkululeko/resample.py,sha256=akSAjJ3qn-O5NAyLJHVHdsK7MUZPGaZUvM2TwMSmj2M,5194
26
26
  nkululeko/runmanager.py,sha256=AswmORVUkCIH0gTx6zEyufvFATQBS8C5TXo2erSNdVg,7611
27
27
  nkululeko/scaler.py,sha256=7VOZ4sREMoQtahfETt9RyuR29Fb7PCwxlYVjBbdCVFc,4101
28
- nkululeko/segment.py,sha256=PAc5sVLoqKzOVENd9A5ybaKrJOvD9WEPwDdJGTv6OIM,4319
28
+ nkululeko/segment.py,sha256=CEKfvKrvq-XbciluOkgGLLe7DQO9PLSFGw8rMsFpDVQ,4476
29
29
  nkululeko/syllable_nuclei.py,sha256=5w_naKxNxz66a_qLkraemi2fggM-gWesiiBPS47iFcE,9931
30
30
  nkululeko/test.py,sha256=1w624vo5KTzmFC8BUStGlLDmIEAFuJUz7J0W-gp7AxI,1677
31
31
  nkululeko/test_predictor.py,sha256=DEHE_D3A6m6KJTrpDKceA1n655t_UZV3WQd57K4a3Ho,2863
@@ -43,7 +43,7 @@ nkululeko/autopredict/ap_gender.py,sha256=b6oTqHKVwOnYh4YlKbuMflssS4HJqs_c1ayusa
43
43
  nkululeko/autopredict/ap_mos.py,sha256=e4hmgb0Yf1_AbC5P0CqXJIvufjhbTrqmI5goARxrY0Y,1107
44
44
  nkululeko/autopredict/ap_pesq.py,sha256=mRt3Loucaoy4vJxwfuxUt0fP88bMGvkmrLCEpKEXWp0,1140
45
45
  nkululeko/autopredict/ap_sdr.py,sha256=VQ2UkxOO3ipqYNNjFwKgEaGCk8IzLI5lX_2tZFLIvTY,1188
46
- nkululeko/autopredict/ap_sid.py,sha256=1TYJg-Bg_LJGPzIWF-oYtXmD5Otvi0fW_f8uzEVZY5g,1309
46
+ nkululeko/autopredict/ap_sid.py,sha256=87LXMHzJ8jt2q9dUtPJd_nJi_XOcFoqpbva-BT4UJN0,1393
47
47
  nkululeko/autopredict/ap_snr.py,sha256=AiTU8-7CMEeowmYkMO19lw1HCb1yTXC6KeulNf8gOqw,1110
48
48
  nkululeko/autopredict/ap_stoi.py,sha256=UEQg1ZV0meAsxgdWB8ieRs9GPXHqArmsaOyCGRwpcnA,1187
49
49
  nkululeko/autopredict/ap_valence.py,sha256=WrW4Ltqi_odW49_4QEVKkfnrcztLIVZ4cXIEHu4dBN8,1026
@@ -106,14 +106,15 @@ nkululeko/reporting/reporter.py,sha256=4OlYZAParkfJKO_aAyxqVpLc21zxZ-jDhtJKIMeUs
106
106
  nkululeko/reporting/result.py,sha256=G63a2tHCwHhM6NBJgYzsWKWJm4Yu3r4hsCHA2Km7eHU,1073
107
107
  nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
108
108
  nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=b3t0zdpJYofKWMyKRMtMMX91xeR-k8d5pbnNaQHcsOE,1902
109
+ nkululeko/segmenting/seg_pyannote.py,sha256=6IPbgjnGOz9juzEKDTZN3PSipX4t6Mz-DILAx3rp5do,4216
109
110
  nkululeko/segmenting/seg_silero.py,sha256=ulodnvtRq5MLHDxy_RmAK4tJg6h1d-mPq-uCPFkGVKg,4258
110
111
  nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
111
112
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
112
113
  nkululeko/utils/stats.py,sha256=vCRzhCR0Gx5SiJyAGbj1TIto8ocGz58CM5Pr3LltagA,2948
113
114
  nkululeko/utils/util.py,sha256=XFZdhCc_LM4EmoZ5tKKaBCQLXclcNmvHwhfT_CXB98c,16723
114
- nkululeko-0.91.3.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
115
- nkululeko-0.91.3.dist-info/METADATA,sha256=FI1hcOEFJbfAHNQi1SCFhPv7sC06vdWa75lIw2Ix4YA,41584
116
- nkululeko-0.91.3.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
117
- nkululeko-0.91.3.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
118
- nkululeko-0.91.3.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
119
- nkululeko-0.91.3.dist-info/RECORD,,
115
+ nkululeko-0.92.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
116
+ nkululeko-0.92.0.dist-info/METADATA,sha256=-So3jBO4lGif0bmb4KgDxFV4p-EyR7u1eejB8mEhotA,41682
117
+ nkululeko-0.92.0.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
118
+ nkululeko-0.92.0.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
119
+ nkululeko-0.92.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
120
+ nkululeko-0.92.0.dist-info/RECORD,,