nkululeko 0.92.2__py3-none-any.whl → 0.93.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/autopredict/ap_sid.py +37 -9
- nkululeko/constants.py +1 -1
- nkululeko/data/dataset_csv.py +4 -4
- nkululeko/experiment.py +52 -55
- nkululeko/explore.py +3 -5
- nkululeko/feat_extract/feats_wav2vec2.py +5 -4
- nkululeko/feat_extract/feats_wavlm.py +3 -2
- nkululeko/utils/files.py +26 -2
- nkululeko/utils/util.py +5 -1
- {nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/METADATA +26 -18
- {nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/RECORD +15 -15
- {nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/WHEEL +1 -1
- {nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/LICENSE +0 -0
- {nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/entry_points.txt +0 -0
- {nkululeko-0.92.2.dist-info → nkululeko-0.93.1.dist-info}/top_level.txt +0 -0
nkululeko/autopredict/ap_sid.py
CHANGED
@@ -6,8 +6,11 @@ import numpy as np
|
|
6
6
|
from pyannote.audio import Pipeline
|
7
7
|
import torch
|
8
8
|
|
9
|
+
import audiofile
|
10
|
+
|
9
11
|
from nkululeko.feature_extractor import FeatureExtractor
|
10
12
|
import nkululeko.glob_conf as glob_conf
|
13
|
+
from nkululeko.utils.files import concat_files
|
11
14
|
from nkululeko.utils.util import Util
|
12
15
|
|
13
16
|
|
@@ -20,7 +23,7 @@ class SIDPredictor:
|
|
20
23
|
def __init__(self, df):
|
21
24
|
self.df = df
|
22
25
|
self.util = Util("sidPredictor")
|
23
|
-
hf_token = self.util.config_val("
|
26
|
+
hf_token = self.util.config_val("MODEL", "hf_token", None)
|
24
27
|
if hf_token is None:
|
25
28
|
self.util.error(
|
26
29
|
"speaker id prediction needs huggingface token: [MODEL][hf_token]"
|
@@ -29,20 +32,45 @@ class SIDPredictor:
|
|
29
32
|
"pyannote/speaker-diarization-3.1",
|
30
33
|
use_auth_token=hf_token,
|
31
34
|
)
|
32
|
-
device = self.util.config_val("
|
35
|
+
device = self.util.config_val("MODEL", "device", "cpu")
|
33
36
|
self.pipeline.to(torch.device(device))
|
34
37
|
|
35
38
|
def predict(self, split_selection):
|
36
39
|
self.util.debug(f"estimating speaker id for {split_selection} samples")
|
37
40
|
return_df = self.df.copy()
|
38
|
-
# @todo
|
39
41
|
# 1) concat all audio files
|
42
|
+
tmp_file = "tmp.wav"
|
43
|
+
concat_files(return_df.index, tmp_file)
|
40
44
|
# 2) get segmentations with pyannote
|
41
|
-
|
45
|
+
sname = "pyannotation"
|
46
|
+
if self.util.exist_pickle(sname):
|
47
|
+
annotation = self.util.from_pickle(sname)
|
48
|
+
else:
|
49
|
+
annotation = self.pipeline(tmp_file)
|
50
|
+
self.util.to_pickle(annotation, sname)
|
42
51
|
|
52
|
+
speakers, starts, ends = [], [], []
|
53
|
+
# print the result
|
54
|
+
for turn, _, speaker in annotation.itertracks(yield_label=True):
|
55
|
+
start = turn.start
|
56
|
+
end = turn.end
|
57
|
+
speakers.append(speaker)
|
58
|
+
starts.append(start)
|
59
|
+
ends.append(end)
|
60
|
+
# 3) map pyannote segments with orginal ones and assign speaker id
|
61
|
+
target_speakers = []
|
62
|
+
position = 0
|
63
|
+
for idx, (file, start, end) in enumerate(return_df.index.to_list()):
|
64
|
+
seg_start = start.total_seconds()
|
65
|
+
seg_end = end.total_seconds()
|
66
|
+
# file_duration = audiofile.duration(file)
|
67
|
+
seg_duration = seg_end - seg_start
|
68
|
+
offset = position + seg_start + seg_duration / 2
|
69
|
+
l = [i < offset for i in starts]
|
70
|
+
r = [i for i, x in enumerate(l) if x]
|
71
|
+
s_index = r.pop()
|
72
|
+
# self.util.debug(f"offset: {offset}, speaker = {speakers[s_index]}")
|
73
|
+
position += seg_duration
|
74
|
+
target_speakers.append(speakers[s_index])
|
75
|
+
return_df["speaker"] = target_speakers
|
43
76
|
return return_df
|
44
|
-
|
45
|
-
def concat_files(self, df):
|
46
|
-
pass
|
47
|
-
# todo
|
48
|
-
# please use https://audeering.github.io/audiofile/usage.html#read-a-file
|
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.
|
1
|
+
VERSION="0.93.1"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/data/dataset_csv.py
CHANGED
@@ -53,7 +53,7 @@ class Dataset_CSV(Dataset):
|
|
53
53
|
if audformat.index_type(df.index) == "segmented":
|
54
54
|
file_index = (
|
55
55
|
df.index.levels[0]
|
56
|
-
.map(lambda x: root
|
56
|
+
.map(lambda x: os.path.join(root, audio_path, x))
|
57
57
|
.values
|
58
58
|
)
|
59
59
|
df = df.set_index(df.index.set_levels(file_index, level="file"))
|
@@ -62,20 +62,20 @@ class Dataset_CSV(Dataset):
|
|
62
62
|
df = pd.DataFrame(df)
|
63
63
|
df = df.set_index(
|
64
64
|
df.index.to_series().apply(
|
65
|
-
lambda x: root
|
65
|
+
lambda x: os.path.join(root, audio_path, x)
|
66
66
|
)
|
67
67
|
)
|
68
68
|
else: # absolute path is True
|
69
69
|
if audformat.index_type(df.index) == "segmented":
|
70
70
|
file_index = (
|
71
|
-
df.index.levels[0].map(lambda x: audio_path
|
71
|
+
df.index.levels[0].map(lambda x: os.path.join(audio_path, x)).values
|
72
72
|
)
|
73
73
|
df = df.set_index(df.index.set_levels(file_index, level="file"))
|
74
74
|
else:
|
75
75
|
if not isinstance(df, pd.DataFrame):
|
76
76
|
df = pd.DataFrame(df)
|
77
77
|
df = df.set_index(
|
78
|
-
df.index.to_series().apply(lambda x: audio_path
|
78
|
+
df.index.to_series().apply(lambda x: os.path.join(audio_path, x))
|
79
79
|
)
|
80
80
|
|
81
81
|
self.df = df
|
nkululeko/experiment.py
CHANGED
@@ -197,6 +197,8 @@ class Experiment:
|
|
197
197
|
)
|
198
198
|
self.df_test = self._import_csv(storage_test)
|
199
199
|
self.df_train = self._import_csv(storage_train)
|
200
|
+
self.train_empty = True if self.df_train.shape[0] == 0 else False
|
201
|
+
self.test_empty = True if self.df_test.shape[0] == 0 else False
|
200
202
|
else:
|
201
203
|
self.df_train, self.df_test = pd.DataFrame(), pd.DataFrame()
|
202
204
|
for d in self.datasets.values():
|
@@ -212,6 +214,8 @@ class Experiment:
|
|
212
214
|
self.util.debug(f"warn: {d.name} test empty")
|
213
215
|
self.df_test = pd.concat([self.df_test, d.df_test])
|
214
216
|
self.util.copy_flags(d, self.df_test)
|
217
|
+
self.train_empty = True if self.df_train.shape[0] == 0 else False
|
218
|
+
self.test_empty = True if self.df_test.shape[0] == 0 else False
|
215
219
|
store = self.util.get_path("store")
|
216
220
|
storage_test = f"{store}testdf.csv"
|
217
221
|
storage_train = f"{store}traindf.csv"
|
@@ -253,50 +257,49 @@ class Experiment:
|
|
253
257
|
if self.util.exp_is_classification():
|
254
258
|
datatype = self.util.config_val("DATA", "type", "dummy")
|
255
259
|
if datatype == "continuous":
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
# else:
|
261
|
-
# # if there is no target, copy a dummy label
|
262
|
-
# self.df_test = self._add_random_target(self.df_test)
|
263
|
-
# if self.df_train.is_labeled:
|
264
|
-
# # remember the target in case they get labelencoded later
|
265
|
-
# self.df_train["class_label"] = self.df_train[self.target]
|
266
|
-
train_cats = self.df_train["class_label"].unique()
|
267
|
-
|
260
|
+
if not self.test_empty:
|
261
|
+
test_cats = self.df_test["class_label"].unique()
|
262
|
+
if not self.train_empty:
|
263
|
+
train_cats = self.df_train["class_label"].unique()
|
268
264
|
else:
|
269
|
-
if self.
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
self.util.debug(f"Categories test (nd.array): {test_cats}")
|
280
|
-
else:
|
281
|
-
self.util.debug(f"Categories test (list): {list(test_cats)}")
|
282
|
-
if type(train_cats) == np.ndarray:
|
283
|
-
self.util.debug(f"Categories train (nd.array): {train_cats}")
|
284
|
-
else:
|
285
|
-
self.util.debug(f"Categories train (list): {list(train_cats)}")
|
286
|
-
|
265
|
+
if not self.test_empty:
|
266
|
+
if self.df_test.is_labeled:
|
267
|
+
test_cats = self.df_test[self.target].unique()
|
268
|
+
else:
|
269
|
+
# if there is no target, copy a dummy label
|
270
|
+
self.df_test = self._add_random_target(self.df_test).astype(
|
271
|
+
"str"
|
272
|
+
)
|
273
|
+
if not self.train_empty:
|
274
|
+
train_cats = self.df_train[self.target].unique()
|
287
275
|
# encode the labels as numbers
|
288
276
|
self.label_encoder = LabelEncoder()
|
289
|
-
self.df_train[self.target] = self.label_encoder.fit_transform(
|
290
|
-
self.df_train[self.target]
|
291
|
-
)
|
292
|
-
self.df_test[self.target] = self.label_encoder.transform(
|
293
|
-
self.df_test[self.target]
|
294
|
-
)
|
295
277
|
glob_conf.set_label_encoder(self.label_encoder)
|
278
|
+
if not self.train_empty:
|
279
|
+
if type(train_cats) == np.ndarray:
|
280
|
+
self.util.debug(f"Categories train (nd.array): {train_cats}")
|
281
|
+
else:
|
282
|
+
self.util.debug(f"Categories train (list): {list(train_cats)}")
|
283
|
+
|
284
|
+
self.df_train[self.target] = self.label_encoder.fit_transform(
|
285
|
+
self.df_train[self.target]
|
286
|
+
)
|
287
|
+
if not self.test_empty:
|
288
|
+
if self.df_test.is_labeled:
|
289
|
+
if type(test_cats) == np.ndarray:
|
290
|
+
self.util.debug(f"Categories test (nd.array): {test_cats}")
|
291
|
+
else:
|
292
|
+
self.util.debug(f"Categories test (list): {list(test_cats)}")
|
293
|
+
if not self.train_empty:
|
294
|
+
self.df_test[self.target] = self.label_encoder.transform(
|
295
|
+
self.df_test[self.target]
|
296
|
+
)
|
296
297
|
if self.got_speaker:
|
298
|
+
speakers_train = 0 if self.train_empty else self.df_train.speaker.nunique()
|
299
|
+
speakers_test = 0 if self.test_empty else self.df_test.speaker.nunique()
|
297
300
|
self.util.debug(
|
298
|
-
f"{
|
299
|
-
f" {
|
301
|
+
f"{speakers_test} speakers in test and"
|
302
|
+
f" {speakers_train} speakers in train"
|
300
303
|
)
|
301
304
|
|
302
305
|
target_factor = self.util.config_val("DATA", "target_divide_by", False)
|
@@ -363,14 +366,16 @@ class Experiment:
|
|
363
366
|
self.util.debug("no feature extractor specified.")
|
364
367
|
self.feats_train, self.feats_test = pd.DataFrame(), pd.DataFrame()
|
365
368
|
return
|
366
|
-
self.
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
369
|
+
if not self.train_empty:
|
370
|
+
self.feature_extractor = FeatureExtractor(
|
371
|
+
df_train, feats_types, feats_name, "train"
|
372
|
+
)
|
373
|
+
self.feats_train = self.feature_extractor.extract()
|
374
|
+
if not self.test_empty:
|
375
|
+
self.feature_extractor = FeatureExtractor(
|
376
|
+
df_test, feats_types, feats_name, "test"
|
377
|
+
)
|
378
|
+
self.feats_test = self.feature_extractor.extract()
|
374
379
|
self.util.debug(
|
375
380
|
f"All features: train shape : {self.feats_train.shape}, test"
|
376
381
|
f" shape:{self.feats_test.shape}"
|
@@ -393,12 +398,6 @@ class Experiment:
|
|
393
398
|
self.util.warn(f"new test labels shape: {self.df_test.shape[0]}")
|
394
399
|
|
395
400
|
self._check_scale()
|
396
|
-
# store = self.util.get_path("store")
|
397
|
-
# store_format = self.util.config_val("FEATS", "store_format", "pkl")
|
398
|
-
# storage = f"{store}test_feats.{store_format}"
|
399
|
-
# self.util.write_store(self.feats_test, storage, store_format)
|
400
|
-
# storage = f"{store}train_feats.{store_format}"
|
401
|
-
# self.util.write_store(self.feats_train, storage, store_format)
|
402
401
|
|
403
402
|
def augment(self):
|
404
403
|
"""Augment the selected samples."""
|
@@ -422,9 +421,7 @@ class Experiment:
|
|
422
421
|
return df_ret
|
423
422
|
|
424
423
|
def autopredict(self):
|
425
|
-
"""
|
426
|
-
Predict labels for samples with existing models and add to the dataframe.
|
427
|
-
"""
|
424
|
+
"""Predict labels for samples with existing models and add to the dataframe."""
|
428
425
|
sample_selection = self.util.config_val("PREDICT", "split", "all")
|
429
426
|
if sample_selection == "all":
|
430
427
|
df = pd.concat([self.df_train, self.df_test])
|
nkululeko/explore.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
"""
|
2
|
-
Explore the feature sets of a machine learning experiment.
|
1
|
+
"""Explore the feature sets of a machine learning experiment.
|
3
2
|
|
4
3
|
This script is the entry point for the 'explore' module of the nkululeko framework.
|
5
4
|
It handles loading the experiment configuration, setting up the experiment, and
|
@@ -77,7 +76,6 @@ def main():
|
|
77
76
|
plot_feats = eval(util.config_val("EXPL", "feature_distributions", "False"))
|
78
77
|
tsne = eval(util.config_val("EXPL", "tsne", "False"))
|
79
78
|
scatter = eval(util.config_val("EXPL", "scatter", "False"))
|
80
|
-
spotlight = eval(util.config_val("EXPL", "spotlight", "False"))
|
81
79
|
shap = eval(util.config_val("EXPL", "shap", "False"))
|
82
80
|
model_type = util.config_val("EXPL", "model", False)
|
83
81
|
plot_tree = eval(util.config_val("EXPL", "plot_tree", "False"))
|
@@ -87,8 +85,8 @@ def main():
|
|
87
85
|
expr.extract_feats()
|
88
86
|
needs_feats = True
|
89
87
|
# explore
|
90
|
-
expr.init_runmanager()
|
91
|
-
expr.runmgr.do_runs()
|
88
|
+
# expr.init_runmanager()
|
89
|
+
# expr.runmgr.do_runs()
|
92
90
|
expr.analyse_features(needs_feats)
|
93
91
|
expr.store_report()
|
94
92
|
print("DONE")
|
@@ -1,4 +1,4 @@
|
|
1
|
-
"""
|
1
|
+
"""feats_wav2vec2.py.
|
2
2
|
feat_types example = [wav2vec2-large-robust-ft-swbd-300h,
|
3
3
|
wav2vec2-xls-r-2b, wav2vec2-large, wav2vec2-large-xlsr-53, wav2vec2-base]
|
4
4
|
|
@@ -11,12 +11,13 @@ import os
|
|
11
11
|
import pandas as pd
|
12
12
|
import torch
|
13
13
|
import torchaudio
|
14
|
-
import transformers
|
15
14
|
from tqdm import tqdm
|
16
|
-
|
15
|
+
import transformers
|
16
|
+
from transformers import Wav2Vec2FeatureExtractor
|
17
|
+
from transformers import Wav2Vec2Model
|
17
18
|
|
18
|
-
import nkululeko.glob_conf as glob_conf
|
19
19
|
from nkululeko.feat_extract.featureset import Featureset
|
20
|
+
import nkululeko.glob_conf as glob_conf
|
20
21
|
|
21
22
|
|
22
23
|
class Wav2vec2(Featureset):
|
@@ -8,10 +8,11 @@ import pandas as pd
|
|
8
8
|
import torch
|
9
9
|
import torchaudio
|
10
10
|
from tqdm import tqdm
|
11
|
-
from transformers import Wav2Vec2FeatureExtractor
|
11
|
+
from transformers import Wav2Vec2FeatureExtractor
|
12
|
+
from transformers import WavLMModel
|
12
13
|
|
13
|
-
import nkululeko.glob_conf as glob_conf
|
14
14
|
from nkululeko.feat_extract.featureset import Featureset
|
15
|
+
import nkululeko.glob_conf as glob_conf
|
15
16
|
|
16
17
|
|
17
18
|
class Wavlm(Featureset):
|
nkululeko/utils/files.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
# -*- coding: utf-8 -*-
|
3
|
-
# copied from librosa.util.files.py
|
3
|
+
# find_files copied from librosa.util.files.py
|
4
4
|
|
5
5
|
"""Utility functions for dealing with files"""
|
6
6
|
from __future__ import annotations
|
@@ -8,11 +8,22 @@ from __future__ import annotations
|
|
8
8
|
import glob
|
9
9
|
import os
|
10
10
|
from pathlib import Path
|
11
|
-
from typing import Any
|
11
|
+
from typing import Any
|
12
|
+
from typing import List
|
13
|
+
from typing import Optional
|
14
|
+
from typing import Set
|
15
|
+
from typing import Union
|
16
|
+
|
17
|
+
import numpy as np
|
18
|
+
from tqdm import tqdm
|
19
|
+
|
20
|
+
import audiofile
|
21
|
+
|
12
22
|
|
13
23
|
# add new function here
|
14
24
|
__all__ = [
|
15
25
|
"find_files",
|
26
|
+
"concat_files",
|
16
27
|
]
|
17
28
|
|
18
29
|
|
@@ -143,3 +154,16 @@ def __get_files(dir_name: Union[str, os.PathLike[Any]], extensions: Set[str]):
|
|
143
154
|
myfiles |= set(glob.glob(globstr))
|
144
155
|
|
145
156
|
return myfiles
|
157
|
+
|
158
|
+
|
159
|
+
def concat_files(index, outfile_path):
|
160
|
+
buffer = np.asarray([])
|
161
|
+
sr = 16000
|
162
|
+
for idx, (file, start, end) in enumerate(tqdm(index.to_list())):
|
163
|
+
signal, sr = audiofile.read(
|
164
|
+
file,
|
165
|
+
offset=start.total_seconds(),
|
166
|
+
duration=(end - start).total_seconds(),
|
167
|
+
)
|
168
|
+
buffer = np.concatenate([buffer, signal])
|
169
|
+
audiofile.write(outfile_path, buffer, sr)
|
nkululeko/utils/util.py
CHANGED
@@ -230,7 +230,11 @@ class Util:
|
|
230
230
|
|
231
231
|
def get_model_description(self):
|
232
232
|
mt = ""
|
233
|
-
|
233
|
+
try:
|
234
|
+
mt = f'{self.config["MODEL"]["type"]}'
|
235
|
+
except KeyError:
|
236
|
+
# no model type given
|
237
|
+
pass
|
234
238
|
# ft = "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
|
235
239
|
ft_value = self.config["FEATS"]["type"]
|
236
240
|
if (
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.93.1
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -100,22 +100,22 @@ And can show the distribution of specific features per category:
|
|
100
100
|
<img src="meta/images/feat_dist.png" width="500px"/>
|
101
101
|
|
102
102
|
### t-SNE plots
|
103
|
-
A t-SNE plot can give you an estimate
|
103
|
+
A t-SNE plot can give you an estimate of whether your acoustic features are useful at all:
|
104
104
|
|
105
105
|
<img src="meta/images/tsne.png" width="500px"/>
|
106
106
|
|
107
107
|
### Data distribution
|
108
|
-
Sometimes you only want to take a look at your data:
|
108
|
+
Sometimes, you only want to take a look at your data:
|
109
109
|
|
110
110
|
<img src="meta/images/data_plot.png" width="500px"/>
|
111
111
|
|
112
112
|
### Bias checking
|
113
|
-
In cases you might wonder if there's bias in your data. You can try to detect this with automatically estimated speech properties
|
113
|
+
In some cases, you might wonder if there's bias in your data. You can try to detect this with automatically estimated speech properties by visualizing the correlation of target labels and predicted labels.
|
114
114
|
|
115
115
|
<img src="meta/images/emotion-pesq.png" width="500px"/>
|
116
116
|
|
117
117
|
### Uncertainty
|
118
|
-
Nkululeko estimates uncertainty of model
|
118
|
+
Nkululeko estimates the uncertainty of model decisions (only for classifiers) with entropy over the class probabilities or logits per sample.
|
119
119
|
|
120
120
|
<img src="meta/images/uncertainty.png" width="500px"/>
|
121
121
|
|
@@ -138,7 +138,7 @@ appears, please try
|
|
138
138
|
```
|
139
139
|
pip install x
|
140
140
|
```
|
141
|
-
For many packages you will need the missing torch package.
|
141
|
+
For many packages, you will need the missing torch package.
|
142
142
|
If you don't have a GPU (which is probably true if you don't know what that is), please use
|
143
143
|
```
|
144
144
|
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
@@ -219,7 +219,7 @@ All of them take *--config <my_config.ini>* as an argument.
|
|
219
219
|
* **nkululeko.predict**: [predict features](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/) like SNR, MOS, arousal/valence, age/gender, with DNN models
|
220
220
|
* **nkululeko.segment**: [segment a database](http://blog.syntheticspeech.de/2023/07/14/nkululeko-segmenting-a-database/) based on VAD (voice activity detection)
|
221
221
|
* **nkululeko.resample**: check on all [sampling rates and change](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/) to 16kHz
|
222
|
-
* **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command
|
222
|
+
* **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command line. Usage:
|
223
223
|
|
224
224
|
```bash
|
225
225
|
$ python -m nkululeko.nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET] [--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
|
@@ -236,7 +236,7 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
|
|
236
236
|
* [Combine feature sets](http://blog.syntheticspeech.de/2022/06/30/how-to-combine-feature-sets-with-nkululeko/)
|
237
237
|
* [Classifying continuous variables](http://blog.syntheticspeech.de/2022/01/26/nkululeko-classifying-continuous-variables/)
|
238
238
|
* [Try out / demo a trained model](http://blog.syntheticspeech.de/2022/01/24/nkululeko-try-out-demo-a-trained-model/)
|
239
|
-
* [Perform cross
|
239
|
+
* [Perform cross-database experiments](http://blog.syntheticspeech.de/2021/10/05/nkululeko-perform-cross-database-experiments/)
|
240
240
|
* [Meta parameter optimization](http://blog.syntheticspeech.de/2021/09/03/perform-optimization-with-nkululeko/)
|
241
241
|
* [How to set up wav2vec embedding](http://blog.syntheticspeech.de/2021/12/03/how-to-set-up-wav2vec-embedding-for-nkululeko/)
|
242
242
|
* [How to soft-label a database](http://blog.syntheticspeech.de/2022/01/24/how-to-soft-label-a-database-with-nkululeko/)
|
@@ -261,7 +261,7 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
|
|
261
261
|
* [Predict new labels for your data from public models and check bias](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/)
|
262
262
|
* [Resample](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/)
|
263
263
|
* [Get some statistics on correlation and effect-size](http://blog.syntheticspeech.de/2023/09/05/nkululeko-get-some-statistics-on-correlation-and-effect-size/)
|
264
|
-
* [Automatic generation of a latex
|
264
|
+
* [Automatic generation of a latex/pdf report](http://blog.syntheticspeech.de/2023/09/26/nkululeko-generate-a-latex-pdf-report/)
|
265
265
|
* [Inspect your data with Spotlight](http://blog.syntheticspeech.de/2023/10/31/nkululeko-inspect-your-data-with-spotlight/)
|
266
266
|
* [Automatically stratify your split sets](http://blog.syntheticspeech.de/2023/11/07/nkululeko-automatically-stratify-your-split-sets/)
|
267
267
|
* [re-name data column names](http://blog.syntheticspeech.de/2023/11/16/nkululeko-re-name-data-column-names/)
|
@@ -277,7 +277,7 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
|
|
277
277
|
* NEW: [Here's a Google colab that runs this example out-of-the-box](https://colab.research.google.com/drive/1Up7t5Nn7VwDPCCEpTg2U7cpZ_PdoEgj-?usp=sharing), and here is the same [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
|
278
278
|
* [I made a video to show you how to do this on Windows](https://www.youtube.com/playlist?list=PLRceVavtxLg0y2jiLmpnUfiMtfvkK912D)
|
279
279
|
* Set up Python on your computer, version >= 3.8
|
280
|
-
* Open a terminal/
|
280
|
+
* Open a terminal/command line/console window
|
281
281
|
* Test python by typing ```python```, python should start with version >3 (NOT 2!). You can leave the Python Interpreter by typing *exit()*
|
282
282
|
* Create a folder on your computer for this example, let's call it `nkulu_work`
|
283
283
|
* Get a copy of the [Berlin emodb in audformat](https://zenodo.org/records/7447302/files/emodb.zip?download=1) and unpack inside the folder you just created (`nkulu_work`)
|
@@ -293,7 +293,7 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
|
|
293
293
|
* if that worked, you should see a ```(venv)``` in front of your prompt
|
294
294
|
* Install the required packages in your environment
|
295
295
|
* ```pip install nkululeko```
|
296
|
-
* Repeat until all error messages
|
296
|
+
* Repeat until all error messages vanish (or fix them, or try to ignore them)...
|
297
297
|
* Now you should have two folders in your *nkulu_work* folder:
|
298
298
|
* *emodb* and *venv*
|
299
299
|
* Download a copy of the file [exp_emodb.ini](meta/demos/exp_emodb.ini) to the current working directory (```nkulu_work```)
|
@@ -301,9 +301,9 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
|
|
301
301
|
* ```python -m nkululeko.nkululeko --config exp_emodb.ini```
|
302
302
|
* Find the results in the newly created folder exp_emodb
|
303
303
|
* Inspect ```exp_emodb/images/run_0/emodb_xgb_os_0_000_cnf.png```
|
304
|
-
* This is the main result of
|
304
|
+
* This is the main result of your experiment: a confusion matrix for the emodb emotional categories
|
305
305
|
* Inspect and play around with the [demo configuration file](meta/demos/exp_emodb.ini) that defined your experiment, then re-run.
|
306
|
-
* There are many ways to experiment with different classifiers and acoustic
|
306
|
+
* There are many ways to experiment with different classifiers and acoustic feature sets, [all described here](https://github.com/felixbur/nkululeko/blob/main/ini_file.md)
|
307
307
|
|
308
308
|
### Features
|
309
309
|
The framework is targeted at the speech domain and supports experiments where different classifiers are combined with different feature extractors.
|
@@ -327,16 +327,16 @@ Here's [an animation that shows the progress of classification done with nkulule
|
|
327
327
|
|
328
328
|
|
329
329
|
## License
|
330
|
-
Nkululeko can be used under the [MIT license](https://choosealicense.com/licenses/mit/)
|
330
|
+
Nkululeko can be used under the [MIT license](https://choosealicense.com/licenses/mit/).
|
331
331
|
|
332
332
|
|
333
333
|
## Contributing
|
334
|
-
Contributions are welcome and encouraged. To learn more about how to contribute to nkululeko please refer to the [Contributing guidelines](./CONTRIBUTING.md)
|
334
|
+
Contributions are welcome and encouraged. To learn more about how to contribute to nkululeko, please refer to the [Contributing guidelines](./CONTRIBUTING.md).
|
335
335
|
|
336
336
|
## Citing
|
337
|
-
If you use it, please mention the Nkululeko paper
|
337
|
+
If you use it, please mention the Nkululeko paper:
|
338
338
|
|
339
|
-
F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schuller: Nkululeko: A Tool For Rapid Speaker Characteristics Detection, Proc. Proc. LREC, 2022
|
339
|
+
> F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schuller: Nkululeko: A Tool For Rapid Speaker Characteristics Detection, Proc. Proc. LREC, 2022
|
340
340
|
|
341
341
|
|
342
342
|
```
|
@@ -355,6 +355,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
355
355
|
Changelog
|
356
356
|
=========
|
357
357
|
|
358
|
+
Version 0.93.1
|
359
|
+
--------------
|
360
|
+
* made explore module more robust
|
361
|
+
|
362
|
+
Version 0.93.0
|
363
|
+
--------------
|
364
|
+
* integrated pyannote for speaker prediction for predict module
|
365
|
+
|
358
366
|
Version 0.92.2
|
359
367
|
--------------
|
360
368
|
* added some output to automatic speaker id
|
@@ -365,7 +373,7 @@ Version 0.92.1
|
|
365
373
|
|
366
374
|
Version 0.92.0
|
367
375
|
--------------
|
368
|
-
* added first version of automatic speaker prediction
|
376
|
+
* added first version of automatic speaker prediction for segment module
|
369
377
|
|
370
378
|
Version 0.91.3
|
371
379
|
--------------
|
@@ -2,14 +2,14 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=FoMbBrfyOZd4QAw7oIHl3X6-UpsqAKWVDIolCA7qOWs,3196
|
3
3
|
nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
|
4
4
|
nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
|
5
|
-
nkululeko/constants.py,sha256
|
5
|
+
nkululeko/constants.py,sha256=-K1r-fO1ilOQ-FT2-YDWo37lCfB7mjYPEylDVqqcP_s,39
|
6
6
|
nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
|
7
7
|
nkululeko/demo.py,sha256=4Yzhg6pCPBYPGJrP7JX2TysVosl_R1llpVDKc2P_gUA,4955
|
8
8
|
nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
|
9
9
|
nkululeko/demo_predictor.py,sha256=lDF-xOxRdEAclOmbepAYg-BQXQdGkHfq2n74PTIoop8,4872
|
10
10
|
nkululeko/ensemble.py,sha256=71V-rre61H3J4sh7lu-OTo4I2_g7mm_rQxwW1ARDHgY,12782
|
11
|
-
nkululeko/experiment.py,sha256=
|
12
|
-
nkululeko/explore.py,sha256=
|
11
|
+
nkululeko/experiment.py,sha256=G5yNFO3z8yGAXJYzZbA-ANAPU9tTtijVyOGG7NGsn2M,31701
|
12
|
+
nkululeko/explore.py,sha256=FPM2CS-LKgcDV-LnjYlD6pEv7HuCQpH_C3KyyiOCdk4,3589
|
13
13
|
nkululeko/export.py,sha256=U-V4acxtuL6qKt6oAsVcM5TTeWogYUJ3GU-lA6rq6d4,4336
|
14
14
|
nkululeko/feature_extractor.py,sha256=UnspIWz3XrNhKnBBhWZkH2bHvD-sROtrQVqB1JvkUyw,4088
|
15
15
|
nkululeko/file_checker.py,sha256=xJY0Q6w47pnmgJVK5rcAKPYBrCpV7eBT4_3YBzTx-H8,3454
|
@@ -43,14 +43,14 @@ nkululeko/autopredict/ap_gender.py,sha256=b6oTqHKVwOnYh4YlKbuMflssS4HJqs_c1ayusa
|
|
43
43
|
nkululeko/autopredict/ap_mos.py,sha256=e4hmgb0Yf1_AbC5P0CqXJIvufjhbTrqmI5goARxrY0Y,1107
|
44
44
|
nkululeko/autopredict/ap_pesq.py,sha256=mRt3Loucaoy4vJxwfuxUt0fP88bMGvkmrLCEpKEXWp0,1140
|
45
45
|
nkululeko/autopredict/ap_sdr.py,sha256=VQ2UkxOO3ipqYNNjFwKgEaGCk8IzLI5lX_2tZFLIvTY,1188
|
46
|
-
nkululeko/autopredict/ap_sid.py,sha256=
|
46
|
+
nkululeko/autopredict/ap_sid.py,sha256=mCxf2DUOPUlDdnVwCeljFJtCXM4uum1poZQ9RrwHHM8,2641
|
47
47
|
nkululeko/autopredict/ap_snr.py,sha256=AiTU8-7CMEeowmYkMO19lw1HCb1yTXC6KeulNf8gOqw,1110
|
48
48
|
nkululeko/autopredict/ap_stoi.py,sha256=UEQg1ZV0meAsxgdWB8ieRs9GPXHqArmsaOyCGRwpcnA,1187
|
49
49
|
nkululeko/autopredict/ap_valence.py,sha256=WrW4Ltqi_odW49_4QEVKkfnrcztLIVZ4cXIEHu4dBN8,1026
|
50
50
|
nkululeko/autopredict/estimate_snr.py,sha256=1k9-XadABudnsNOeFZD_Fg0E64-GUQVS7JEp82MLQS4,4995
|
51
51
|
nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
52
52
|
nkululeko/data/dataset.py,sha256=Hz2IOsdcESG-P3aP7r4d1xj_gIP6fyGCYOwukoQ7SM8,29321
|
53
|
-
nkululeko/data/dataset_csv.py,sha256=
|
53
|
+
nkululeko/data/dataset_csv.py,sha256=p2b4eS5R2Q5zdOIc56NRRU2PTFXSRt0qrdHGafHkWKo,4830
|
54
54
|
nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
55
55
|
nkululeko/feat_extract/feats_agender.py,sha256=onfAQ6-xx_mFMJXEF1IX8cHBmGtGeX6weJmxbkfh1_o,3184
|
56
56
|
nkululeko/feat_extract/feats_agender_agender.py,sha256=_YQv1qw--3uQfnyTQDCwlmPRnrhdMhgXbYK2yQtseW0,3464
|
@@ -71,8 +71,8 @@ nkululeko/feat_extract/feats_spectra.py,sha256=6WhFUpB0WTutg7OFMlAw9lSwVU5OBYCDc
|
|
71
71
|
nkululeko/feat_extract/feats_spkrec.py,sha256=o_6bdU4lIkj64S5Kdjf1iyuo1VASeYxE4XdxV94a8gE,4732
|
72
72
|
nkululeko/feat_extract/feats_squim.py,sha256=yJifsp9kj9iJjW_UAKr3LlvVhX5rv7el4bepn0wN2a8,4578
|
73
73
|
nkululeko/feat_extract/feats_trill.py,sha256=TUCrh5xbfnHD2gzb9mlkMSV4aK6YXazMqsh5xJ5yzUI,3188
|
74
|
-
nkululeko/feat_extract/feats_wav2vec2.py,sha256=
|
75
|
-
nkululeko/feat_extract/feats_wavlm.py,sha256=
|
74
|
+
nkululeko/feat_extract/feats_wav2vec2.py,sha256=WYB9XlRzgDi8cGSKzhV5jahA0GZ_SiWgaQ25IcEemto,5296
|
75
|
+
nkululeko/feat_extract/feats_wavlm.py,sha256=O9cfc39VF5aPJRRATKb37pHT4W11i2cu5O1mY9LOjIA,4755
|
76
76
|
nkululeko/feat_extract/feats_whisper.py,sha256=n3ESZtva7wshs8E8diBlQYa9xCH_P0UY1DncSrxz-FY,4508
|
77
77
|
nkululeko/feat_extract/featureset.py,sha256=clcBv9rzBRW-bfw7JC_FYTjU5uUS-c0UE1XtQLYYRiE,1615
|
78
78
|
nkululeko/feat_extract/feinberg_praat.py,sha256=bgzWtQkKbgcygrzwAxDXosui1rcc38qhWuJq9GLr0z8,21308
|
@@ -109,12 +109,12 @@ nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=b3t0zdpJYofKWMyKRMtMMX91xe
|
|
109
109
|
nkululeko/segmenting/seg_pyannote.py,sha256=6IPbgjnGOz9juzEKDTZN3PSipX4t6Mz-DILAx3rp5do,4216
|
110
110
|
nkululeko/segmenting/seg_silero.py,sha256=ulodnvtRq5MLHDxy_RmAK4tJg6h1d-mPq-uCPFkGVKg,4258
|
111
111
|
nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
112
|
-
nkululeko/utils/files.py,sha256=
|
112
|
+
nkululeko/utils/files.py,sha256=SrrYaU7AB80MZHiV1jcB0h_zigvYLYgSVNTXV4ao38g,4593
|
113
113
|
nkululeko/utils/stats.py,sha256=vCRzhCR0Gx5SiJyAGbj1TIto8ocGz58CM5Pr3LltagA,2948
|
114
|
-
nkululeko/utils/util.py,sha256=
|
115
|
-
nkululeko-0.
|
116
|
-
nkululeko-0.
|
117
|
-
nkululeko-0.
|
118
|
-
nkululeko-0.
|
119
|
-
nkululeko-0.
|
120
|
-
nkululeko-0.
|
114
|
+
nkululeko/utils/util.py,sha256=yxETonpbcGTeJhvdDr7sC4CO0Qtf-pgHEclZ76eOtPA,16816
|
115
|
+
nkululeko-0.93.1.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
116
|
+
nkululeko-0.93.1.dist-info/METADATA,sha256=eLAEogu2sQxBrmzlxfHKs8AP2d9uELQLGhmIJ5sBWgk,42018
|
117
|
+
nkululeko-0.93.1.dist-info/WHEEL,sha256=a7TGlA-5DaHMRrarXjVbQagU3Man_dCnGIWMJr5kRWo,91
|
118
|
+
nkululeko-0.93.1.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
|
119
|
+
nkululeko-0.93.1.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
120
|
+
nkululeko-0.93.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|