nkululeko 0.60.0__py3-none-any.whl → 0.62.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/feature_extractor.py +90 -47
- nkululeko/plots.py +11 -0
- nkululeko/resample.py +63 -0
- nkululeko/segment.py +29 -38
- nkululeko/util.py +31 -28
- {nkululeko-0.60.0.dist-info → nkululeko-0.62.0.dist-info}/METADATA +12 -1
- {nkululeko-0.60.0.dist-info → nkululeko-0.62.0.dist-info}/RECORD +11 -10
- {nkululeko-0.60.0.dist-info → nkululeko-0.62.0.dist-info}/LICENSE +0 -0
- {nkululeko-0.60.0.dist-info → nkululeko-0.62.0.dist-info}/WHEEL +0 -0
- {nkululeko-0.60.0.dist-info → nkululeko-0.62.0.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '0.
|
1
|
+
VERSION = '0.62.0'
|
nkululeko/feature_extractor.py
CHANGED
@@ -5,8 +5,9 @@ Helper class to encapsulate feature extraction methods
|
|
5
5
|
|
6
6
|
"""
|
7
7
|
import pandas as pd
|
8
|
-
|
8
|
+
|
9
9
|
from nkululeko.feat_extract.feats_opensmile import Opensmileset
|
10
|
+
from nkululeko.util import Util
|
10
11
|
|
11
12
|
|
12
13
|
class FeatureExtractor:
|
@@ -16,80 +17,122 @@ class FeatureExtractor:
|
|
16
17
|
data_df (pandas.DataFrame): dataframe with audiofile paths as index
|
17
18
|
feats_types (array of strings): designations of acoustic feature extractors to be used
|
18
19
|
data_name (string): names of databases that are extracted (for the caching)
|
19
|
-
feats_designation (string): the type of split (train/test), also is used for the cache name.
|
20
|
+
feats_designation (string): the type of split (train/test), also is used for the cache name.
|
20
21
|
Returns:
|
21
|
-
df (pandas.DataFrame): dataframe with same index as data_df and acoustic features in columns
|
22
|
+
df (pandas.DataFrame): dataframe with same index as data_df and acoustic features in columns
|
22
23
|
"""
|
23
|
-
df = None # pandas dataframe to store the features (and indexed with the data from the sets)
|
24
|
-
data_df = None # dataframe to get audio paths
|
25
24
|
|
26
|
-
#
|
25
|
+
# pandas dataframe to store the features (and indexed with the data from the sets)
|
26
|
+
df = None
|
27
|
+
data_df = None # dataframe to get audio paths
|
28
|
+
|
29
|
+
# def __init__
|
27
30
|
def __init__(self, data_df, feats_types, data_name, feats_designation):
|
28
31
|
self.data_df = data_df
|
29
32
|
self.data_name = data_name
|
30
33
|
self.feats_types = feats_types
|
31
|
-
self.util = Util(
|
34
|
+
self.util = Util("feature_extractor")
|
32
35
|
self.feats_designation = feats_designation
|
33
|
-
|
36
|
+
|
34
37
|
def extract(self):
|
35
38
|
# feats_types = self.util.config_val_list('FEATS', 'type', ['os'])
|
36
39
|
self.featExtractor = None
|
37
|
-
self.feats= pd.DataFrame()
|
40
|
+
self.feats = pd.DataFrame()
|
38
41
|
_scale = True
|
39
42
|
for feats_type in self.feats_types:
|
40
|
-
store_name = f
|
41
|
-
if feats_type==
|
42
|
-
self.featExtractor = Opensmileset(
|
43
|
-
|
43
|
+
store_name = f"{self.data_name}_{feats_type}"
|
44
|
+
if feats_type == "os":
|
45
|
+
self.featExtractor = Opensmileset(
|
46
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
47
|
+
)
|
48
|
+
elif feats_type == "trill":
|
44
49
|
from nkululeko.feat_extract.feats_trill import TRILLset
|
45
|
-
self.featExtractor = TRILLset(
|
46
|
-
|
50
|
+
self.featExtractor = TRILLset(
|
51
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
52
|
+
)
|
53
|
+
elif feats_type.startswith("wav2vec2"):
|
47
54
|
from nkululeko.feat_extract.feats_wav2vec2 import Wav2vec2
|
48
|
-
self.featExtractor = Wav2vec2(
|
49
|
-
|
55
|
+
self.featExtractor = Wav2vec2(
|
56
|
+
f"{store_name}_{self.feats_designation}", self.data_df,
|
57
|
+
feats_type
|
58
|
+
)
|
59
|
+
elif feats_type.startswith("hubert"):
|
60
|
+
from nkululeko.feat_extract.feats_hubert import Hubert
|
61
|
+
self.featExtractor = Hubert(
|
62
|
+
f"{store_name}_{self.feats_designation}", self.data_df,
|
63
|
+
feats_type
|
64
|
+
)
|
65
|
+
|
66
|
+
elif feats_type == "audmodel":
|
50
67
|
from nkululeko.feat_extract.feats_audmodel import AudModelSet
|
51
|
-
self.featExtractor = AudModelSet(
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
self.featExtractor =
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
68
|
+
self.featExtractor = AudModelSet(
|
69
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
70
|
+
)
|
71
|
+
elif feats_type == "auddim":
|
72
|
+
from nkululeko.feat_extract.feats_audmodel_dim import \
|
73
|
+
AudModelDimSet
|
74
|
+
self.featExtractor = AudModelDimSet(
|
75
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
76
|
+
)
|
77
|
+
elif feats_type == "agender":
|
78
|
+
from nkululeko.feat_extract.feats_agender import \
|
79
|
+
AudModelAgenderSet
|
80
|
+
self.featExtractor = AudModelAgenderSet(
|
81
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
82
|
+
)
|
83
|
+
elif feats_type == "agender_agender":
|
84
|
+
from nkululeko.feat_extract.feats_agender_agender import \
|
85
|
+
AgenderAgenderSet
|
86
|
+
self.featExtractor = AgenderAgenderSet(
|
87
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
88
|
+
)
|
89
|
+
elif feats_type == "snr":
|
62
90
|
from nkululeko.feat_extract.feats_snr import SNRSet
|
63
|
-
self.featExtractor = SNRSet(
|
64
|
-
|
91
|
+
self.featExtractor = SNRSet(
|
92
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
93
|
+
)
|
94
|
+
elif feats_type == "mos":
|
65
95
|
from nkululeko.feat_extract.feats_mos import MOSSet
|
66
|
-
self.featExtractor = MOSSet(
|
67
|
-
|
96
|
+
self.featExtractor = MOSSet(
|
97
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
98
|
+
)
|
99
|
+
elif feats_type == "squim":
|
68
100
|
from nkululeko.feat_extract.feats_squim import SQUIMSet
|
69
|
-
self.featExtractor = SQUIMSet(
|
70
|
-
|
101
|
+
self.featExtractor = SQUIMSet(
|
102
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
103
|
+
)
|
104
|
+
elif feats_type == "clap":
|
71
105
|
from nkululeko.feat_extract.feats_clap import Clap
|
72
|
-
self.featExtractor = Clap(
|
73
|
-
|
106
|
+
self.featExtractor = Clap(
|
107
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
108
|
+
)
|
109
|
+
elif feats_type == "praat":
|
74
110
|
from nkululeko.feat_extract.feats_praat import Praatset
|
75
|
-
self.featExtractor = Praatset(
|
76
|
-
|
111
|
+
self.featExtractor = Praatset(
|
112
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
113
|
+
)
|
114
|
+
elif feats_type == "mld":
|
77
115
|
from nkululeko.feat_extract.feats_mld import MLD_set
|
78
|
-
self.featExtractor = MLD_set(
|
79
|
-
|
116
|
+
self.featExtractor = MLD_set(
|
117
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
118
|
+
)
|
119
|
+
elif feats_type == "import":
|
80
120
|
from nkululeko.feat_extract.feats_import import Importset
|
81
|
-
self.featExtractor = Importset(
|
121
|
+
self.featExtractor = Importset(
|
122
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
123
|
+
)
|
82
124
|
else:
|
83
|
-
self.util.error(f
|
125
|
+
self.util.error(f"unknown feats_type: {feats_type}")
|
84
126
|
|
85
127
|
self.featExtractor.extract()
|
86
128
|
self.featExtractor.filter()
|
87
129
|
# remove samples that were not extracted by MLD
|
88
|
-
#self.df_test = self.df_test.loc[self.df_test.index.intersection(featExtractor_test.df.index)]
|
89
|
-
#self.df_train = self.df_train.loc[self.df_train.index.intersection(featExtractor_train.df.index)]
|
90
|
-
self.util.debug(
|
91
|
-
|
130
|
+
# self.df_test = self.df_test.loc[self.df_test.index.intersection(featExtractor_test.df.index)]
|
131
|
+
# self.df_train = self.df_train.loc[self.df_train.index.intersection(featExtractor_train.df.index)]
|
132
|
+
self.util.debug(
|
133
|
+
f"{feats_type}: shape : {self.featExtractor.df.shape}")
|
134
|
+
self.feats = pd.concat([self.feats, self.featExtractor.df], axis=1)
|
92
135
|
return self.feats
|
93
136
|
|
94
137
|
def extract_sample(self, signal, sr):
|
95
|
-
return self.featExtractor.extract_sample(signal, sr)
|
138
|
+
return self.featExtractor.extract_sample(signal, sr)
|
nkululeko/plots.py
CHANGED
@@ -95,6 +95,17 @@ class Plots():
|
|
95
95
|
else:
|
96
96
|
self.util.error(f'plot value counts: the plot distribution descriptor for {att} has more than 2 values')
|
97
97
|
|
98
|
+
def plot_durations(self, df, filename, sample_selection):
|
99
|
+
fig_dir = self.util.get_path('fig_dir')+'../' # one up because of the runs
|
100
|
+
ax = sns.histplot(df, x='duration', hue='class_label', kde=True)
|
101
|
+
ax.set_title(f'{sample_selection} {df.shape[0]}')
|
102
|
+
ax.set_xlabel(f'duration')
|
103
|
+
ax.set_ylabel(f'number of samples')
|
104
|
+
fig = ax.figure
|
105
|
+
plt.tight_layout()
|
106
|
+
plt.savefig(f'{fig_dir}{filename}_{sample_selection}.{self.format}')
|
107
|
+
plt.close(fig)
|
108
|
+
fig.clear()
|
98
109
|
|
99
110
|
def describe_df(self, name, df, target, filename):
|
100
111
|
"""Make a stacked barplot of samples and speakers per sex and target values. speaker, gender and target columns must be present"""
|
nkululeko/resample.py
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
# resample.py
|
2
|
+
# change the sampling rate for train and test splits
|
3
|
+
|
4
|
+
from nkululeko.experiment import Experiment
|
5
|
+
import configparser
|
6
|
+
from nkululeko.util import Util
|
7
|
+
from nkululeko.constants import VERSION
|
8
|
+
import argparse
|
9
|
+
import os
|
10
|
+
import pandas as pd
|
11
|
+
from nkululeko.augmenting .resampler import Resampler
|
12
|
+
|
13
|
+
def main(src_dir):
|
14
|
+
parser = argparse.ArgumentParser(description='Call the nkululeko framework.')
|
15
|
+
parser.add_argument('--config', default='exp.ini', help='The base configuration')
|
16
|
+
args = parser.parse_args()
|
17
|
+
if args.config is not None:
|
18
|
+
config_file = args.config
|
19
|
+
else:
|
20
|
+
config_file = f'{src_dir}/exp.ini'
|
21
|
+
|
22
|
+
# test if the configuration file exists
|
23
|
+
if not os.path.isfile(config_file):
|
24
|
+
print(f'ERROR: no such file: {config_file}')
|
25
|
+
exit()
|
26
|
+
|
27
|
+
# load one configuration per experiment
|
28
|
+
config = configparser.ConfigParser()
|
29
|
+
config.read(config_file)
|
30
|
+
# create a new experiment
|
31
|
+
expr = Experiment(config)
|
32
|
+
util = Util('resample')
|
33
|
+
util.debug(f'running {expr.name} from config {config_file}, nkululeko version {VERSION}')
|
34
|
+
|
35
|
+
if util.config_val('EXP', 'no_warnings', False):
|
36
|
+
import warnings
|
37
|
+
warnings.filterwarnings("ignore")
|
38
|
+
|
39
|
+
# load the data
|
40
|
+
expr.load_datasets()
|
41
|
+
|
42
|
+
# split into train and test
|
43
|
+
expr.fill_train_and_tests()
|
44
|
+
util.debug(f'train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}')
|
45
|
+
|
46
|
+
sample_selection = util.config_val('RESAMPLE', 'sample_selection', 'all')
|
47
|
+
if sample_selection=='all':
|
48
|
+
df = pd.concat([expr.df_train, expr.df_test])
|
49
|
+
elif sample_selection=='train':
|
50
|
+
df = expr.df_train
|
51
|
+
elif sample_selection=='test':
|
52
|
+
df = expr.df_test
|
53
|
+
else:
|
54
|
+
util.error(f'unknown selection specifier {sample_selection}, should be [all | train | test]')
|
55
|
+
util.debug(f'resampling {sample_selection}: {df.shape[0]} samples')
|
56
|
+
rs = Resampler(df)
|
57
|
+
rs.resample()
|
58
|
+
print('DONE')
|
59
|
+
|
60
|
+
|
61
|
+
if __name__ == "__main__":
|
62
|
+
cwd = os.path.dirname(os.path.abspath(__file__))
|
63
|
+
main(cwd) # use this if you want to state the config file path on command line
|
nkululeko/segment.py
CHANGED
@@ -7,25 +7,8 @@ from nkululeko.util import Util
|
|
7
7
|
from nkululeko.constants import VERSION
|
8
8
|
import argparse
|
9
9
|
import os
|
10
|
-
import torch
|
11
|
-
import audformat
|
12
|
-
from audformat.utils import to_filewise_index
|
13
|
-
from audformat import segmented_index
|
14
10
|
import pandas as pd
|
15
11
|
|
16
|
-
# initialize the VAD model
|
17
|
-
SAMPLING_RATE = 16000
|
18
|
-
torch.set_num_threads(1)
|
19
|
-
vad_model, vad_utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
|
20
|
-
model='silero_vad',
|
21
|
-
force_reload=False,
|
22
|
-
onnx=False)
|
23
|
-
(get_speech_timestamps,
|
24
|
-
save_audio,
|
25
|
-
read_audio,
|
26
|
-
VADIterator,
|
27
|
-
collect_chunks) = vad_utils
|
28
|
-
|
29
12
|
def main(src_dir):
|
30
13
|
parser = argparse.ArgumentParser(description='Call the nkululeko framework.')
|
31
14
|
parser.add_argument('--config', default='exp.ini', help='The base configuration')
|
@@ -60,19 +43,10 @@ def main(src_dir):
|
|
60
43
|
util.debug(f'train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}')
|
61
44
|
|
62
45
|
# segment
|
63
|
-
segment_target = util.config_val('
|
64
|
-
# this if a specific dataset is to be segmented
|
65
|
-
# segment_db = util.config_val('DATA', 'segment', False)
|
66
|
-
# if segment_db:
|
67
|
-
# for dataset in expr.datasets.keys:
|
68
|
-
# if segment_db == dataset:
|
69
|
-
# df = expr.datasets[dataset].df
|
70
|
-
# util.debug(f'segmenting {dataset}')
|
71
|
-
# df_seg = segment_dataframe(df)
|
72
|
-
# name = f'{dataset}{segment_target}'
|
73
|
-
# df_seg.to_csv(f'{expr.data_dir}/{name}.csv')
|
46
|
+
segment_target = util.config_val('SEGMENT', 'target', '_seg')
|
74
47
|
|
75
|
-
|
48
|
+
segmenter = util.config_val('SEGMENT', 'method', 'silero')
|
49
|
+
sample_selection = util.config_val('SEGMENT', 'sample_selection', 'all')
|
76
50
|
if sample_selection=='all':
|
77
51
|
df = pd.concat([expr.df_train, expr.df_test])
|
78
52
|
elif sample_selection=='train':
|
@@ -81,26 +55,43 @@ def main(src_dir):
|
|
81
55
|
df = expr.df_test
|
82
56
|
else:
|
83
57
|
util.error(f'unknown segmentation selection specifier {sample_selection}, should be [all | train | test]')
|
58
|
+
util.debug(f'segmenting {sample_selection}: {df.shape[0]} samples with {segmenter}')
|
59
|
+
if segmenter=='silero':
|
60
|
+
from nkululeko.segmenting.seg_silero import Silero_segmenter
|
61
|
+
segmenter = Silero_segmenter()
|
62
|
+
df_seg = segmenter.segment_dataframe(df)
|
63
|
+
|
64
|
+
else:
|
65
|
+
util.error(f'unkown segmenter: {segmenter}')
|
84
66
|
|
85
|
-
if "duration" not in df.columns:
|
86
|
-
df = df.drop(columns=['duration'], inplace=True)
|
87
|
-
util.debug(f'segmenting train and test set: {df.shape[0]} samples')
|
88
|
-
df_seg = segment_dataframe(df)
|
89
67
|
def calc_dur(x):
|
68
|
+
from datetime import datetime
|
90
69
|
starts = x[1]
|
91
70
|
ends = x[2]
|
92
71
|
return (ends - starts).total_seconds()
|
93
72
|
df_seg['duration'] = df_seg.index.to_series().map(lambda x:calc_dur(x))
|
73
|
+
if "duration" not in df.columns:
|
74
|
+
df['duration'] = df.index.to_series().map(lambda x:calc_dur(x))
|
75
|
+
num_before = df.shape[0]
|
76
|
+
num_after = df_seg.shape[0]
|
77
|
+
# plot distributions
|
78
|
+
from nkululeko.plots import Plots
|
79
|
+
plots = Plots()
|
80
|
+
plots.plot_durations(df, 'original_durations', sample_selection)
|
81
|
+
plots.plot_durations(df_seg, 'segmented_durations', sample_selection)
|
82
|
+
print('')
|
83
|
+
# remove encoded labels
|
84
|
+
target = util.config_val('DATA', 'target', 'emotion')
|
85
|
+
if 'class_label' in df_seg.columns:
|
86
|
+
df_seg = df_seg.drop(columns=[target])
|
87
|
+
df_seg = df_seg.rename(columns={'class_label':target})
|
88
|
+
# save file
|
94
89
|
dataname = '_'.join(expr.datasets.keys())
|
95
90
|
name = f'{dataname}{segment_target}'
|
96
91
|
df_seg.to_csv(f'{expr.data_dir}/{name}.csv')
|
97
|
-
|
98
|
-
util.debug(f'saved {name}.csv to {expr.data_dir}, {df_seg.shape[0]} samples')
|
92
|
+
util.debug(f'saved {name}.csv to {expr.data_dir}, {num_after} samples (was {num_before})')
|
99
93
|
print('DONE')
|
100
94
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
95
|
def get_segmentation(file):
|
105
96
|
# print(f'segmenting {file[0]}')
|
106
97
|
print('.', end='')
|
nkululeko/util.py
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
import audeer
|
3
3
|
import ast
|
4
4
|
import sys
|
5
|
-
import nkululeko.glob_conf as glob_conf
|
6
5
|
import numpy as np
|
7
6
|
import os.path
|
8
7
|
import configparser
|
@@ -11,16 +10,20 @@ import pandas as pd
|
|
11
10
|
|
12
11
|
class Util:
|
13
12
|
|
13
|
+
# a list of words that need not to be warned upon if default values are used
|
14
14
|
stopvals = [False, 'False', 'classification', 'png']
|
15
15
|
|
16
|
-
def __init__(self, caller=None):
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
16
|
+
def __init__(self, caller=None, has_config=True):
|
17
|
+
if has_config:
|
18
|
+
import nkululeko.glob_conf as glob_conf
|
19
|
+
self.config = glob_conf.config
|
20
|
+
self.got_data_roots = self.config_val('DATA', 'root_folders', False)
|
21
|
+
if self.got_data_roots:
|
22
|
+
# if there is a global data rootfolder file, read from there
|
23
|
+
if not os.path.isfile(self.got_data_roots):
|
24
|
+
self.error(f'no such file: {self.got_data_roots}')
|
25
|
+
self.data_roots = configparser.ConfigParser()
|
26
|
+
self.data_roots.read(self.got_data_roots)
|
24
27
|
if caller is not None:
|
25
28
|
self.caller = caller
|
26
29
|
else:
|
@@ -30,10 +33,10 @@ class Util:
|
|
30
33
|
"""
|
31
34
|
This method allows the user to get the directory path for the given argument.
|
32
35
|
"""
|
33
|
-
root =
|
34
|
-
name =
|
36
|
+
root = self.config['EXP']['root']
|
37
|
+
name = self.config['EXP']['name']
|
35
38
|
try:
|
36
|
-
entryn =
|
39
|
+
entryn = self.config['EXP'][entry]
|
37
40
|
except KeyError:
|
38
41
|
# some default values
|
39
42
|
if entry == 'fig_dir':
|
@@ -61,7 +64,7 @@ class Util:
|
|
61
64
|
If the value is present in the experiment configuration it will be used, else
|
62
65
|
we look in a global file specified by the root_folders value.
|
63
66
|
"""
|
64
|
-
configuration =
|
67
|
+
configuration = self.config
|
65
68
|
try:
|
66
69
|
if len(key)>0:
|
67
70
|
return configuration['DATA'][dataset+'.'+key]
|
@@ -95,8 +98,8 @@ class Util:
|
|
95
98
|
"""
|
96
99
|
Get the experiment directory
|
97
100
|
"""
|
98
|
-
root =
|
99
|
-
name =
|
101
|
+
root = self.config['EXP']['root']
|
102
|
+
name = self.config['EXP']['name']
|
100
103
|
dir_name = f'{root}{name}'
|
101
104
|
audeer.mkdir(dir_name)
|
102
105
|
return dir_name
|
@@ -124,19 +127,19 @@ class Util:
|
|
124
127
|
"""
|
125
128
|
Get a string as name from all databases that are useed
|
126
129
|
"""
|
127
|
-
return '_'.join(ast.literal_eval(
|
130
|
+
return '_'.join(ast.literal_eval(self.config['DATA']['databases']))
|
128
131
|
|
129
132
|
def get_exp_name(self, only_train = False, only_data = False):
|
130
133
|
trains_val = self.config_val('DATA', 'trains', False)
|
131
134
|
if only_train and trains_val:
|
132
135
|
# try to get only the train tables
|
133
|
-
ds = '_'.join(ast.literal_eval(
|
136
|
+
ds = '_'.join(ast.literal_eval(self.config['DATA']['trains']))
|
134
137
|
else:
|
135
|
-
ds = '_'.join(ast.literal_eval(
|
138
|
+
ds = '_'.join(ast.literal_eval(self.config['DATA']['databases']))
|
136
139
|
mt = ''
|
137
140
|
if not only_data:
|
138
|
-
mt = f'_{
|
139
|
-
ft = '_'.join(ast.literal_eval(
|
141
|
+
mt = f'_{self.config["MODEL"]["type"]}'
|
142
|
+
ft = '_'.join(ast.literal_eval(self.config['FEATS']['type']))
|
140
143
|
ft += '_'
|
141
144
|
set = self.config_val('FEATS', 'set', False)
|
142
145
|
set_string = ''
|
@@ -159,7 +162,7 @@ class Util:
|
|
159
162
|
|
160
163
|
def get_plot_name(self):
|
161
164
|
try:
|
162
|
-
plot_name =
|
165
|
+
plot_name = self.config['PLOT']['name']
|
163
166
|
except KeyError:
|
164
167
|
plot_name = self.get_exp_name()
|
165
168
|
return plot_name
|
@@ -183,10 +186,10 @@ class Util:
|
|
183
186
|
def set_config_val(self, section, key, value):
|
184
187
|
try:
|
185
188
|
# does the section already exists?
|
186
|
-
|
189
|
+
self.config[section][key] = str(value)
|
187
190
|
except KeyError:
|
188
|
-
|
189
|
-
|
191
|
+
self.config.add_section(section)
|
192
|
+
self.config[section][key] = str(value)
|
190
193
|
|
191
194
|
def check_df(self, i, df):
|
192
195
|
"""Check a dataframe"""
|
@@ -195,7 +198,7 @@ class Util:
|
|
195
198
|
)
|
196
199
|
def config_val(self, section, key, default):
|
197
200
|
try:
|
198
|
-
return
|
201
|
+
return self.config[section][key]
|
199
202
|
except KeyError:
|
200
203
|
if not default in self.stopvals:
|
201
204
|
self.debug(f'value for {key} not found, using default: {default}')
|
@@ -203,7 +206,7 @@ class Util:
|
|
203
206
|
|
204
207
|
def config_val_list(self, section, key, default):
|
205
208
|
try:
|
206
|
-
return ast.literal_eval(
|
209
|
+
return ast.literal_eval(self.config[section][key])
|
207
210
|
except KeyError:
|
208
211
|
if not default in self.stopvals:
|
209
212
|
self.debug(f'value for {key} not found, using default: {default}')
|
@@ -213,11 +216,11 @@ class Util:
|
|
213
216
|
# try:
|
214
217
|
# labels = glob_conf.label_encoder.classes_
|
215
218
|
# except AttributeError:
|
216
|
-
labels = ast.literal_eval(
|
219
|
+
labels = ast.literal_eval(self.config['DATA']['labels'])
|
217
220
|
return labels
|
218
221
|
|
219
222
|
def continuous_to_categorical(self, array):
|
220
|
-
bins = ast.literal_eval(
|
223
|
+
bins = ast.literal_eval(self.config['DATA']['bins'])
|
221
224
|
result = np.digitize(array, bins)-1
|
222
225
|
return result
|
223
226
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.62.0
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -253,6 +253,17 @@ Nkululeko can be used under the [MIT license](https://choosealicense.com/license
|
|
253
253
|
Changelog
|
254
254
|
=========
|
255
255
|
|
256
|
+
Version 0.62.0
|
257
|
+
--------------
|
258
|
+
* fixed segment silero bug
|
259
|
+
* added all Wav2vec2 models
|
260
|
+
* added resampler module
|
261
|
+
* added error on file for embeddings
|
262
|
+
|
263
|
+
Version 0.61.0
|
264
|
+
--------------
|
265
|
+
* added HUBERT embeddings
|
266
|
+
|
256
267
|
Version 0.60.0
|
257
268
|
--------------
|
258
269
|
* some bugfixes
|
@@ -2,30 +2,31 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/augment.py,sha256=ebv5QebGD8wLzXInvusjn4kFlET6-yXkYoF132BrubQ,1750
|
3
3
|
nkululeko/balancer.py,sha256=64ftZN68sMDfkvuovCDHpAHmSJgCO6Kdk9bwmpSisec,12
|
4
4
|
nkululeko/cacheddataset.py,sha256=bSJ_SDg7TxL89YL_pJXp-sFvdUXJtHuBTd5KSTE4AkQ,955
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=7u8AMX9auXDDDSpQnfb8--c2xIvy8IAsVaxENC-wi0k,18
|
6
6
|
nkululeko/demo.py,sha256=nGP3fUDXuW1ZF12AzMpzRWXct0rdqYRJVNgA9B_QWwA,1821
|
7
7
|
nkululeko/demo_predictor.py,sha256=VVxE2lf5lTkAP5qElG5U2bK6SdDzQ2Jmf0Vn_yHpSro,2302
|
8
8
|
nkululeko/experiment.py,sha256=9qStgy31svY4bBVZOkuJ0JFjEQ1sIT2ibIdJ6IVlfTI,25063
|
9
9
|
nkululeko/explore.py,sha256=aemOk5XYw7axQEJQfdABEUxN3img0NYSb8l6W-nDzZY,2090
|
10
|
-
nkululeko/feature_extractor.py,sha256=
|
10
|
+
nkululeko/feature_extractor.py,sha256=h2kMJR6fElshCUXw_A6fg3gNbKRMXrZR4SGhaaQI4wk,6274
|
11
11
|
nkululeko/file_checker.py,sha256=Nw05SIp7Ez1U9ZeFhNGz0XivwKr43hHg1WsfzKsrFPQ,3510
|
12
12
|
nkululeko/filter_data.py,sha256=g7giEShbA-dr2ekVycW5WurFG-UaopJvDZWylKNZtpM,6717
|
13
13
|
nkululeko/glob_conf.py,sha256=2Tl0NZQeVeydDO8T2tuJC2lCv99wvaTVRX9Dy1Mq_L4,237
|
14
14
|
nkululeko/modelrunner.py,sha256=zVDi2-UyjtmU0_Ltf4lnPcECVtukuDVuZaj4pydqOBY,5478
|
15
15
|
nkululeko/nkululeko.py,sha256=O2Zw7u-Mb7VP9MPxAlhdTkXV2lW2kETIuSJp7mfj_Tc,1671
|
16
|
-
nkululeko/plots.py,sha256=
|
16
|
+
nkululeko/plots.py,sha256=hoOFLbWXpV5jGDWHEpy345_4vpaGKGMAv2JwvpNUxkw,11454
|
17
17
|
nkululeko/predict.py,sha256=3ei4wn2by0p9Vkv7cllMcszmEjSM2vX0T6x_5rlgT28,1851
|
18
18
|
nkululeko/reporter.py,sha256=359aeQWt0ZGLseaJnOfafYG8BrwumiM2Q58DWiaoyWQ,10177
|
19
|
+
nkululeko/resample.py,sha256=392zQEpRaWGwBvRBMXY9j2WtTTEaELk8HRPQ6ajzJSQ,2121
|
19
20
|
nkululeko/result.py,sha256=Ey5FPsAyfnQVtzO_J6_4hkOAZ191YWmF_vXxlgNjCdc,406
|
20
21
|
nkululeko/runmanager.py,sha256=ll04dEu5Y1nOi8QOtmSiw3oxzcXeASdQsg0t-vxCny8,6765
|
21
22
|
nkululeko/scaler.py,sha256=6NQHbSQZO9HIfhYNlliuDRywjaEH_FVKHRskTJ2xr90,3021
|
22
|
-
nkululeko/segment.py,sha256=
|
23
|
+
nkululeko/segment.py,sha256=YcrSQ_iP80inPZbXN4jFpKbMmGxI_Xcev5IfK0GSdJ0,4349
|
23
24
|
nkululeko/syllable_nuclei.py,sha256=vK9dj5deqRnyEmlZmhFtKPzqKVGNCgTnWaG8UDITKNg,9913
|
24
25
|
nkululeko/test.py,sha256=BbHGliDChAXqMe2oA579dJpyZSlPGAm5997lX_POboQ,1372
|
25
26
|
nkululeko/test_predictor.py,sha256=QwdAVPHNew9w5PD_sPFhhWVDTYRAbUE6fkAp58X8Hjg,2410
|
26
|
-
nkululeko/util.py,sha256=
|
27
|
-
nkululeko-0.
|
28
|
-
nkululeko-0.
|
29
|
-
nkululeko-0.
|
30
|
-
nkululeko-0.
|
31
|
-
nkululeko-0.
|
27
|
+
nkululeko/util.py,sha256=VE8HbcdUkfHbo1xQNVznyemoE-OVeWgDwsjbZpEBqUA,9840
|
28
|
+
nkululeko-0.62.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
29
|
+
nkululeko-0.62.0.dist-info/METADATA,sha256=lQogxZd1eFD1PEKzwujzUSHQU3sju2IY3v7QQM0ddLE,21828
|
30
|
+
nkululeko-0.62.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
31
|
+
nkululeko-0.62.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
32
|
+
nkululeko-0.62.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|