nkululeko 0.92.2__py3-none-any.whl → 0.93.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/autopredict/ap_sid.py +37 -9
- nkululeko/constants.py +1 -1
- nkululeko/data/dataset_csv.py +4 -4
- nkululeko/experiment.py +1 -3
- nkululeko/feat_extract/feats_wav2vec2.py +5 -4
- nkululeko/feat_extract/feats_wavlm.py +3 -2
- nkululeko/utils/files.py +26 -2
- {nkululeko-0.92.2.dist-info → nkululeko-0.93.0.dist-info}/METADATA +6 -2
- {nkululeko-0.92.2.dist-info → nkululeko-0.93.0.dist-info}/RECORD +13 -13
- {nkululeko-0.92.2.dist-info → nkululeko-0.93.0.dist-info}/LICENSE +0 -0
- {nkululeko-0.92.2.dist-info → nkululeko-0.93.0.dist-info}/WHEEL +0 -0
- {nkululeko-0.92.2.dist-info → nkululeko-0.93.0.dist-info}/entry_points.txt +0 -0
- {nkululeko-0.92.2.dist-info → nkululeko-0.93.0.dist-info}/top_level.txt +0 -0
nkululeko/autopredict/ap_sid.py
CHANGED
@@ -6,8 +6,11 @@ import numpy as np
|
|
6
6
|
from pyannote.audio import Pipeline
|
7
7
|
import torch
|
8
8
|
|
9
|
+
import audiofile
|
10
|
+
|
9
11
|
from nkululeko.feature_extractor import FeatureExtractor
|
10
12
|
import nkululeko.glob_conf as glob_conf
|
13
|
+
from nkululeko.utils.files import concat_files
|
11
14
|
from nkululeko.utils.util import Util
|
12
15
|
|
13
16
|
|
@@ -20,7 +23,7 @@ class SIDPredictor:
|
|
20
23
|
def __init__(self, df):
|
21
24
|
self.df = df
|
22
25
|
self.util = Util("sidPredictor")
|
23
|
-
hf_token = self.util.config_val("
|
26
|
+
hf_token = self.util.config_val("MODEL", "hf_token", None)
|
24
27
|
if hf_token is None:
|
25
28
|
self.util.error(
|
26
29
|
"speaker id prediction needs huggingface token: [MODEL][hf_token]"
|
@@ -29,20 +32,45 @@ class SIDPredictor:
|
|
29
32
|
"pyannote/speaker-diarization-3.1",
|
30
33
|
use_auth_token=hf_token,
|
31
34
|
)
|
32
|
-
device = self.util.config_val("
|
35
|
+
device = self.util.config_val("MODEL", "device", "cpu")
|
33
36
|
self.pipeline.to(torch.device(device))
|
34
37
|
|
35
38
|
def predict(self, split_selection):
|
36
39
|
self.util.debug(f"estimating speaker id for {split_selection} samples")
|
37
40
|
return_df = self.df.copy()
|
38
|
-
# @todo
|
39
41
|
# 1) concat all audio files
|
42
|
+
tmp_file = "tmp.wav"
|
43
|
+
concat_files(return_df.index, tmp_file)
|
40
44
|
# 2) get segmentations with pyannote
|
41
|
-
|
45
|
+
sname = "pyannotation"
|
46
|
+
if self.util.exist_pickle(sname):
|
47
|
+
annotation = self.util.from_pickle(sname)
|
48
|
+
else:
|
49
|
+
annotation = self.pipeline(tmp_file)
|
50
|
+
self.util.to_pickle(annotation, sname)
|
42
51
|
|
52
|
+
speakers, starts, ends = [], [], []
|
53
|
+
# print the result
|
54
|
+
for turn, _, speaker in annotation.itertracks(yield_label=True):
|
55
|
+
start = turn.start
|
56
|
+
end = turn.end
|
57
|
+
speakers.append(speaker)
|
58
|
+
starts.append(start)
|
59
|
+
ends.append(end)
|
60
|
+
# 3) map pyannote segments with orginal ones and assign speaker id
|
61
|
+
target_speakers = []
|
62
|
+
position = 0
|
63
|
+
for idx, (file, start, end) in enumerate(return_df.index.to_list()):
|
64
|
+
seg_start = start.total_seconds()
|
65
|
+
seg_end = end.total_seconds()
|
66
|
+
# file_duration = audiofile.duration(file)
|
67
|
+
seg_duration = seg_end - seg_start
|
68
|
+
offset = position + seg_start + seg_duration / 2
|
69
|
+
l = [i < offset for i in starts]
|
70
|
+
r = [i for i, x in enumerate(l) if x]
|
71
|
+
s_index = r.pop()
|
72
|
+
# self.util.debug(f"offset: {offset}, speaker = {speakers[s_index]}")
|
73
|
+
position += seg_duration
|
74
|
+
target_speakers.append(speakers[s_index])
|
75
|
+
return_df["speaker"] = target_speakers
|
43
76
|
return return_df
|
44
|
-
|
45
|
-
def concat_files(self, df):
|
46
|
-
pass
|
47
|
-
# todo
|
48
|
-
# please use https://audeering.github.io/audiofile/usage.html#read-a-file
|
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.
|
1
|
+
VERSION="0.93.0"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/data/dataset_csv.py
CHANGED
@@ -53,7 +53,7 @@ class Dataset_CSV(Dataset):
|
|
53
53
|
if audformat.index_type(df.index) == "segmented":
|
54
54
|
file_index = (
|
55
55
|
df.index.levels[0]
|
56
|
-
.map(lambda x: root
|
56
|
+
.map(lambda x: os.path.join(root, audio_path, x))
|
57
57
|
.values
|
58
58
|
)
|
59
59
|
df = df.set_index(df.index.set_levels(file_index, level="file"))
|
@@ -62,20 +62,20 @@ class Dataset_CSV(Dataset):
|
|
62
62
|
df = pd.DataFrame(df)
|
63
63
|
df = df.set_index(
|
64
64
|
df.index.to_series().apply(
|
65
|
-
lambda x: root
|
65
|
+
lambda x: os.path.join(root, audio_path, x)
|
66
66
|
)
|
67
67
|
)
|
68
68
|
else: # absolute path is True
|
69
69
|
if audformat.index_type(df.index) == "segmented":
|
70
70
|
file_index = (
|
71
|
-
df.index.levels[0].map(lambda x: audio_path
|
71
|
+
df.index.levels[0].map(lambda x: os.path.join(audio_path, x)).values
|
72
72
|
)
|
73
73
|
df = df.set_index(df.index.set_levels(file_index, level="file"))
|
74
74
|
else:
|
75
75
|
if not isinstance(df, pd.DataFrame):
|
76
76
|
df = pd.DataFrame(df)
|
77
77
|
df = df.set_index(
|
78
|
-
df.index.to_series().apply(lambda x: audio_path
|
78
|
+
df.index.to_series().apply(lambda x: os.path.join(audio_path, x))
|
79
79
|
)
|
80
80
|
|
81
81
|
self.df = df
|
nkululeko/experiment.py
CHANGED
@@ -422,9 +422,7 @@ class Experiment:
|
|
422
422
|
return df_ret
|
423
423
|
|
424
424
|
def autopredict(self):
|
425
|
-
"""
|
426
|
-
Predict labels for samples with existing models and add to the dataframe.
|
427
|
-
"""
|
425
|
+
"""Predict labels for samples with existing models and add to the dataframe."""
|
428
426
|
sample_selection = self.util.config_val("PREDICT", "split", "all")
|
429
427
|
if sample_selection == "all":
|
430
428
|
df = pd.concat([self.df_train, self.df_test])
|
@@ -1,4 +1,4 @@
|
|
1
|
-
"""
|
1
|
+
"""feats_wav2vec2.py.
|
2
2
|
feat_types example = [wav2vec2-large-robust-ft-swbd-300h,
|
3
3
|
wav2vec2-xls-r-2b, wav2vec2-large, wav2vec2-large-xlsr-53, wav2vec2-base]
|
4
4
|
|
@@ -11,12 +11,13 @@ import os
|
|
11
11
|
import pandas as pd
|
12
12
|
import torch
|
13
13
|
import torchaudio
|
14
|
-
import transformers
|
15
14
|
from tqdm import tqdm
|
16
|
-
|
15
|
+
import transformers
|
16
|
+
from transformers import Wav2Vec2FeatureExtractor
|
17
|
+
from transformers import Wav2Vec2Model
|
17
18
|
|
18
|
-
import nkululeko.glob_conf as glob_conf
|
19
19
|
from nkululeko.feat_extract.featureset import Featureset
|
20
|
+
import nkululeko.glob_conf as glob_conf
|
20
21
|
|
21
22
|
|
22
23
|
class Wav2vec2(Featureset):
|
@@ -8,10 +8,11 @@ import pandas as pd
|
|
8
8
|
import torch
|
9
9
|
import torchaudio
|
10
10
|
from tqdm import tqdm
|
11
|
-
from transformers import Wav2Vec2FeatureExtractor
|
11
|
+
from transformers import Wav2Vec2FeatureExtractor
|
12
|
+
from transformers import WavLMModel
|
12
13
|
|
13
|
-
import nkululeko.glob_conf as glob_conf
|
14
14
|
from nkululeko.feat_extract.featureset import Featureset
|
15
|
+
import nkululeko.glob_conf as glob_conf
|
15
16
|
|
16
17
|
|
17
18
|
class Wavlm(Featureset):
|
nkululeko/utils/files.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
# -*- coding: utf-8 -*-
|
3
|
-
# copied from librosa.util.files.py
|
3
|
+
# find_files copied from librosa.util.files.py
|
4
4
|
|
5
5
|
"""Utility functions for dealing with files"""
|
6
6
|
from __future__ import annotations
|
@@ -8,11 +8,22 @@ from __future__ import annotations
|
|
8
8
|
import glob
|
9
9
|
import os
|
10
10
|
from pathlib import Path
|
11
|
-
from typing import Any
|
11
|
+
from typing import Any
|
12
|
+
from typing import List
|
13
|
+
from typing import Optional
|
14
|
+
from typing import Set
|
15
|
+
from typing import Union
|
16
|
+
|
17
|
+
import numpy as np
|
18
|
+
from tqdm import tqdm
|
19
|
+
|
20
|
+
import audiofile
|
21
|
+
|
12
22
|
|
13
23
|
# add new function here
|
14
24
|
__all__ = [
|
15
25
|
"find_files",
|
26
|
+
"concat_files",
|
16
27
|
]
|
17
28
|
|
18
29
|
|
@@ -143,3 +154,16 @@ def __get_files(dir_name: Union[str, os.PathLike[Any]], extensions: Set[str]):
|
|
143
154
|
myfiles |= set(glob.glob(globstr))
|
144
155
|
|
145
156
|
return myfiles
|
157
|
+
|
158
|
+
|
159
|
+
def concat_files(index, outfile_path):
|
160
|
+
buffer = np.asarray([])
|
161
|
+
sr = 16000
|
162
|
+
for idx, (file, start, end) in enumerate(tqdm(index.to_list())):
|
163
|
+
signal, sr = audiofile.read(
|
164
|
+
file,
|
165
|
+
offset=start.total_seconds(),
|
166
|
+
duration=(end - start).total_seconds(),
|
167
|
+
)
|
168
|
+
buffer = np.concatenate([buffer, signal])
|
169
|
+
audiofile.write(outfile_path, buffer, sr)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.93.0
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -355,6 +355,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
355
355
|
Changelog
|
356
356
|
=========
|
357
357
|
|
358
|
+
Version 0.93.0
|
359
|
+
--------------
|
360
|
+
* integrated pyannote for speaker prediction for predict module
|
361
|
+
|
358
362
|
Version 0.92.2
|
359
363
|
--------------
|
360
364
|
* added some output to automatic speaker id
|
@@ -365,7 +369,7 @@ Version 0.92.1
|
|
365
369
|
|
366
370
|
Version 0.92.0
|
367
371
|
--------------
|
368
|
-
* added first version of automatic speaker prediction
|
372
|
+
* added first version of automatic speaker prediction for segment module
|
369
373
|
|
370
374
|
Version 0.91.3
|
371
375
|
--------------
|
@@ -2,13 +2,13 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=FoMbBrfyOZd4QAw7oIHl3X6-UpsqAKWVDIolCA7qOWs,3196
|
3
3
|
nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
|
4
4
|
nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=YWEAJOJi8kWTdeRVHLVLQH66lH0czOfFjt-jbpnGmwY,39
|
6
6
|
nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
|
7
7
|
nkululeko/demo.py,sha256=4Yzhg6pCPBYPGJrP7JX2TysVosl_R1llpVDKc2P_gUA,4955
|
8
8
|
nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
|
9
9
|
nkululeko/demo_predictor.py,sha256=lDF-xOxRdEAclOmbepAYg-BQXQdGkHfq2n74PTIoop8,4872
|
10
10
|
nkululeko/ensemble.py,sha256=71V-rre61H3J4sh7lu-OTo4I2_g7mm_rQxwW1ARDHgY,12782
|
11
|
-
nkululeko/experiment.py,sha256=
|
11
|
+
nkululeko/experiment.py,sha256=uU_8WR8JuUD50lgcl_K_BBQYmHMbuwAniWft8bGHuDU,31842
|
12
12
|
nkululeko/explore.py,sha256=Y5lPPychnI-7fyP8zvwVb9P09fvprbUPOofOppuABYQ,3658
|
13
13
|
nkululeko/export.py,sha256=U-V4acxtuL6qKt6oAsVcM5TTeWogYUJ3GU-lA6rq6d4,4336
|
14
14
|
nkululeko/feature_extractor.py,sha256=UnspIWz3XrNhKnBBhWZkH2bHvD-sROtrQVqB1JvkUyw,4088
|
@@ -43,14 +43,14 @@ nkululeko/autopredict/ap_gender.py,sha256=b6oTqHKVwOnYh4YlKbuMflssS4HJqs_c1ayusa
|
|
43
43
|
nkululeko/autopredict/ap_mos.py,sha256=e4hmgb0Yf1_AbC5P0CqXJIvufjhbTrqmI5goARxrY0Y,1107
|
44
44
|
nkululeko/autopredict/ap_pesq.py,sha256=mRt3Loucaoy4vJxwfuxUt0fP88bMGvkmrLCEpKEXWp0,1140
|
45
45
|
nkululeko/autopredict/ap_sdr.py,sha256=VQ2UkxOO3ipqYNNjFwKgEaGCk8IzLI5lX_2tZFLIvTY,1188
|
46
|
-
nkululeko/autopredict/ap_sid.py,sha256=
|
46
|
+
nkululeko/autopredict/ap_sid.py,sha256=mCxf2DUOPUlDdnVwCeljFJtCXM4uum1poZQ9RrwHHM8,2641
|
47
47
|
nkululeko/autopredict/ap_snr.py,sha256=AiTU8-7CMEeowmYkMO19lw1HCb1yTXC6KeulNf8gOqw,1110
|
48
48
|
nkululeko/autopredict/ap_stoi.py,sha256=UEQg1ZV0meAsxgdWB8ieRs9GPXHqArmsaOyCGRwpcnA,1187
|
49
49
|
nkululeko/autopredict/ap_valence.py,sha256=WrW4Ltqi_odW49_4QEVKkfnrcztLIVZ4cXIEHu4dBN8,1026
|
50
50
|
nkululeko/autopredict/estimate_snr.py,sha256=1k9-XadABudnsNOeFZD_Fg0E64-GUQVS7JEp82MLQS4,4995
|
51
51
|
nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
52
52
|
nkululeko/data/dataset.py,sha256=Hz2IOsdcESG-P3aP7r4d1xj_gIP6fyGCYOwukoQ7SM8,29321
|
53
|
-
nkululeko/data/dataset_csv.py,sha256=
|
53
|
+
nkululeko/data/dataset_csv.py,sha256=p2b4eS5R2Q5zdOIc56NRRU2PTFXSRt0qrdHGafHkWKo,4830
|
54
54
|
nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
55
55
|
nkululeko/feat_extract/feats_agender.py,sha256=onfAQ6-xx_mFMJXEF1IX8cHBmGtGeX6weJmxbkfh1_o,3184
|
56
56
|
nkululeko/feat_extract/feats_agender_agender.py,sha256=_YQv1qw--3uQfnyTQDCwlmPRnrhdMhgXbYK2yQtseW0,3464
|
@@ -71,8 +71,8 @@ nkululeko/feat_extract/feats_spectra.py,sha256=6WhFUpB0WTutg7OFMlAw9lSwVU5OBYCDc
|
|
71
71
|
nkululeko/feat_extract/feats_spkrec.py,sha256=o_6bdU4lIkj64S5Kdjf1iyuo1VASeYxE4XdxV94a8gE,4732
|
72
72
|
nkululeko/feat_extract/feats_squim.py,sha256=yJifsp9kj9iJjW_UAKr3LlvVhX5rv7el4bepn0wN2a8,4578
|
73
73
|
nkululeko/feat_extract/feats_trill.py,sha256=TUCrh5xbfnHD2gzb9mlkMSV4aK6YXazMqsh5xJ5yzUI,3188
|
74
|
-
nkululeko/feat_extract/feats_wav2vec2.py,sha256=
|
75
|
-
nkululeko/feat_extract/feats_wavlm.py,sha256=
|
74
|
+
nkululeko/feat_extract/feats_wav2vec2.py,sha256=WYB9XlRzgDi8cGSKzhV5jahA0GZ_SiWgaQ25IcEemto,5296
|
75
|
+
nkululeko/feat_extract/feats_wavlm.py,sha256=O9cfc39VF5aPJRRATKb37pHT4W11i2cu5O1mY9LOjIA,4755
|
76
76
|
nkululeko/feat_extract/feats_whisper.py,sha256=n3ESZtva7wshs8E8diBlQYa9xCH_P0UY1DncSrxz-FY,4508
|
77
77
|
nkululeko/feat_extract/featureset.py,sha256=clcBv9rzBRW-bfw7JC_FYTjU5uUS-c0UE1XtQLYYRiE,1615
|
78
78
|
nkululeko/feat_extract/feinberg_praat.py,sha256=bgzWtQkKbgcygrzwAxDXosui1rcc38qhWuJq9GLr0z8,21308
|
@@ -109,12 +109,12 @@ nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=b3t0zdpJYofKWMyKRMtMMX91xe
|
|
109
109
|
nkululeko/segmenting/seg_pyannote.py,sha256=6IPbgjnGOz9juzEKDTZN3PSipX4t6Mz-DILAx3rp5do,4216
|
110
110
|
nkululeko/segmenting/seg_silero.py,sha256=ulodnvtRq5MLHDxy_RmAK4tJg6h1d-mPq-uCPFkGVKg,4258
|
111
111
|
nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
112
|
-
nkululeko/utils/files.py,sha256=
|
112
|
+
nkululeko/utils/files.py,sha256=SrrYaU7AB80MZHiV1jcB0h_zigvYLYgSVNTXV4ao38g,4593
|
113
113
|
nkululeko/utils/stats.py,sha256=vCRzhCR0Gx5SiJyAGbj1TIto8ocGz58CM5Pr3LltagA,2948
|
114
114
|
nkululeko/utils/util.py,sha256=XFZdhCc_LM4EmoZ5tKKaBCQLXclcNmvHwhfT_CXB98c,16723
|
115
|
-
nkululeko-0.
|
116
|
-
nkululeko-0.
|
117
|
-
nkululeko-0.
|
118
|
-
nkululeko-0.
|
119
|
-
nkululeko-0.
|
120
|
-
nkululeko-0.
|
115
|
+
nkululeko-0.93.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
116
|
+
nkululeko-0.93.0.dist-info/METADATA,sha256=3q74htqBYa_dfgtZzah5SPDwjG3o2c9sfGBlJK9sfI4,41933
|
117
|
+
nkululeko-0.93.0.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
118
|
+
nkululeko-0.93.0.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
|
119
|
+
nkululeko-0.93.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
120
|
+
nkululeko-0.93.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|