nkululeko 0.65.2__py3-none-any.whl → 0.65.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +2 -2
- nkululeko/data/dataset.py +5 -2
- nkululeko/data/dataset_csv.py +8 -6
- nkululeko/export.py +35 -19
- nkululeko/feat_extract/feats_wav2vec2.py +1 -1
- nkululeko/plots.py +6 -0
- nkululeko/utils/stats.py +13 -2
- {nkululeko-0.65.2.dist-info → nkululeko-0.65.5.dist-info}/METADATA +14 -1
- {nkululeko-0.65.2.dist-info → nkululeko-0.65.5.dist-info}/RECORD +12 -12
- {nkululeko-0.65.2.dist-info → nkululeko-0.65.5.dist-info}/LICENSE +0 -0
- {nkululeko-0.65.2.dist-info → nkululeko-0.65.5.dist-info}/WHEEL +0 -0
- {nkululeko-0.65.2.dist-info → nkululeko-0.65.5.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.65.
|
2
|
-
SAMPLING_RATE=16000
|
1
|
+
VERSION="0.65.5"
|
2
|
+
SAMPLING_RATE=16000
|
nkululeko/data/dataset.py
CHANGED
@@ -159,7 +159,7 @@ class Dataset:
|
|
159
159
|
self.df["duration"] = (end - start).total_seconds()
|
160
160
|
|
161
161
|
# Perform some filtering if desired
|
162
|
-
required =
|
162
|
+
required = self.util.config_val_data(self.name, "required", False)
|
163
163
|
if required:
|
164
164
|
pre = self.df.shape[0]
|
165
165
|
self.df = self.df[self.df[required].notna()]
|
@@ -204,7 +204,10 @@ class Dataset:
|
|
204
204
|
pass
|
205
205
|
try:
|
206
206
|
# try to get the gender values
|
207
|
-
|
207
|
+
if "gender" in source_df:
|
208
|
+
df_local["gender"] = source_df["gender"]
|
209
|
+
else:
|
210
|
+
df_local["gender"] = source_df["sex"]
|
208
211
|
got_gender = True
|
209
212
|
except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
|
210
213
|
pass
|
nkululeko/data/dataset_csv.py
CHANGED
@@ -48,21 +48,23 @@ class Dataset_CSV(Dataset):
|
|
48
48
|
self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
|
49
49
|
if self.is_labeled and not "class_label" in self.df.columns:
|
50
50
|
self.df["class_label"] = self.df[self.target]
|
51
|
-
if "gender" in df.columns:
|
51
|
+
if "gender" in self.df.columns:
|
52
52
|
self.got_gender = True
|
53
|
-
|
53
|
+
elif "sex" in self.df.columns:
|
54
|
+
self.df = self.df.rename(columns={'sex':'gender'})
|
55
|
+
self.got_gender = True
|
56
|
+
if "age" in self.df.columns:
|
54
57
|
self.got_age = True
|
55
|
-
if "speaker" in df.columns:
|
58
|
+
if "speaker" in self.df.columns:
|
56
59
|
self.got_speaker = True
|
57
|
-
ns = df["speaker"].nunique()
|
58
|
-
self.util.debug(f"num of speakers: {ns}")
|
59
60
|
speaker_num = 0
|
60
61
|
if self.got_speaker:
|
61
62
|
speaker_num = self.df.speaker.nunique()
|
62
63
|
self.util.debug(
|
63
64
|
f"Loaded database {self.name} with {df.shape[0]} "
|
64
65
|
f"samples: got targets: {self.got_target}, got speakers: {self.got_speaker} ({speaker_num}), "
|
65
|
-
f"got sexes: {self.got_gender}"
|
66
|
+
f"got sexes: {self.got_gender}, "
|
67
|
+
f"got age: {self.got_age}"
|
66
68
|
)
|
67
69
|
|
68
70
|
def prepare(self):
|
nkululeko/export.py
CHANGED
@@ -10,7 +10,7 @@ import audiofile
|
|
10
10
|
from nkululeko.experiment import Experiment
|
11
11
|
from nkululeko.util import Util
|
12
12
|
from nkululeko.constants import VERSION
|
13
|
-
|
13
|
+
import shutil
|
14
14
|
|
15
15
|
def main(src_dir):
|
16
16
|
parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
|
@@ -54,6 +54,7 @@ def main(src_dir):
|
|
54
54
|
target_root = util.config_val("EXPORT", "root", "./exported_data/")
|
55
55
|
orig_root = util.config_val("EXPORT", "orig_root", None)
|
56
56
|
data_name = util.config_val("EXPORT", "data_name", "export")
|
57
|
+
segments_as_files = eval(util.config_val("EXPORT", "segments_as_files", "False"))
|
57
58
|
audeer.mkdir(target_root)
|
58
59
|
splits = {"train": df_train, "test": df_test}
|
59
60
|
df_all = pd.DataFrame()
|
@@ -61,25 +62,40 @@ def main(src_dir):
|
|
61
62
|
files = []
|
62
63
|
df = splits[split]
|
63
64
|
for idx, (file, start, end) in enumerate(df.index.to_list()):
|
64
|
-
signal, sampling_rate = audiofile.read(
|
65
|
-
file,
|
66
|
-
offset=start.total_seconds(),
|
67
|
-
duration=(end - start).total_seconds(),
|
68
|
-
always_2d=True,
|
69
|
-
)
|
70
65
|
file_dir = os.path.dirname(file)
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
66
|
+
if segments_as_files:
|
67
|
+
signal, sampling_rate = audiofile.read(
|
68
|
+
file,
|
69
|
+
offset=start.total_seconds(),
|
70
|
+
duration=(end - start).total_seconds(),
|
71
|
+
always_2d=True,
|
72
|
+
)
|
73
|
+
file_name = os.path.basename(file)[:-3]+'_'+start.total_seconds()+'.wav'
|
74
|
+
wav_folder = (
|
75
|
+
f"{target_root}/{os.path.basename(os.path.normpath(orig_root))}"
|
76
|
+
)
|
77
|
+
audeer.mkdir(wav_folder)
|
78
|
+
new_rel_path = file_dir[file_dir.index(orig_root)+1+len(orig_root):]
|
79
|
+
new_file_path = f"{wav_folder}/{new_rel_path}"
|
80
|
+
audeer.mkdir(new_file_path)
|
81
|
+
new_file_name = f"{new_file_path}/{file_name}"
|
82
|
+
audiofile.write(new_file_name, signal, sampling_rate)
|
83
|
+
new_file_name = os.path.relpath(new_file_name, target_root)
|
84
|
+
files.append(new_file_name)
|
85
|
+
else:
|
86
|
+
file_name = os.path.basename(file)[:-4]+'_'+str(start.total_seconds())+'.wav'
|
87
|
+
wav_folder = (
|
88
|
+
f"{target_root}/{os.path.basename(os.path.normpath(orig_root))}"
|
89
|
+
)
|
90
|
+
audeer.mkdir(wav_folder)
|
91
|
+
new_rel_path = file_dir[file_dir.index(orig_root)+1+len(orig_root):]
|
92
|
+
new_file_path = f"{wav_folder}/{new_rel_path}"
|
93
|
+
audeer.mkdir(new_file_path)
|
94
|
+
new_file_name = f"{new_file_path}/{file_name}"
|
95
|
+
if not os.path.exists(new_file_name):
|
96
|
+
shutil.copyfile(file, new_file_name)
|
97
|
+
new_file_name = os.path.relpath(new_file_name, target_root)
|
98
|
+
files.append(new_file_name)
|
83
99
|
df = df.set_index(df.index.set_levels(files, level="file"))
|
84
100
|
df["split"] = split
|
85
101
|
df_all = pd.concat([df_all, df])
|
@@ -54,7 +54,7 @@ class Wav2vec2(Featureset):
|
|
54
54
|
signal, sampling_rate = torchaudio.load(file,
|
55
55
|
frame_offset=int(start.total_seconds()*16000),
|
56
56
|
num_frames=int((end - start).total_seconds()*16000))
|
57
|
-
assert sampling_rate == 16000
|
57
|
+
assert sampling_rate == 16000, f"got {sampling_rate} instead of 16000"
|
58
58
|
emb = self.get_embeddings(signal, sampling_rate, file)
|
59
59
|
emb_series[idx] = emb
|
60
60
|
if idx % 10 == 0:
|
nkululeko/plots.py
CHANGED
@@ -57,6 +57,8 @@ class Plots():
|
|
57
57
|
bin_reals = eval(self.util.config_val('EXPL', 'bin_reals', 'True'))
|
58
58
|
for att in attributes:
|
59
59
|
if len(att) == 1:
|
60
|
+
if att[0] not in df:
|
61
|
+
self.util.error(f'unknown feature: {att[0]}')
|
60
62
|
self.util.debug(f'plotting {att[0]}')
|
61
63
|
filename = f'{self.target}-{att[0]}'
|
62
64
|
if self.util.is_categorical(df[att[0]]):
|
@@ -90,6 +92,10 @@ class Plots():
|
|
90
92
|
plt.close(fig)
|
91
93
|
# fig.clear() # avoid error
|
92
94
|
elif len(att) == 2:
|
95
|
+
if att[0] not in df:
|
96
|
+
self.util.error(f'unknown feature: {att[0]}')
|
97
|
+
if att[1] not in df:
|
98
|
+
self.util.error(f'unknown feature: {att[1]}')
|
93
99
|
self.util.debug(f'plotting {att}')
|
94
100
|
att1 = att[0]
|
95
101
|
att2 = att[1]
|
nkululeko/utils/stats.py
CHANGED
@@ -3,6 +3,14 @@ import math
|
|
3
3
|
import numpy as np
|
4
4
|
import pandas as pd
|
5
5
|
|
6
|
+
def check_na(a):
|
7
|
+
if np.isnan(a).any():
|
8
|
+
count = np.count_nonzero(np.isnan(a))
|
9
|
+
print(f'WARNING: got {count} Nans (of {len(a)}), setting to 0')
|
10
|
+
a[np.isnan(a)] = 0
|
11
|
+
return a
|
12
|
+
else:
|
13
|
+
return a
|
6
14
|
|
7
15
|
def cohen_d(d1, d2):
|
8
16
|
"""
|
@@ -13,6 +21,9 @@ def cohen_d(d1, d2):
|
|
13
21
|
Returns:
|
14
22
|
Cohen's d with precision 3
|
15
23
|
"""
|
24
|
+
# Checks:
|
25
|
+
d1 = check_na(d1)
|
26
|
+
d2 = check_na(d2)
|
16
27
|
# calculate the size of samples
|
17
28
|
n1, n2 = len(d1), len(d2)
|
18
29
|
# calculate the variance of the samples
|
@@ -22,8 +33,8 @@ def cohen_d(d1, d2):
|
|
22
33
|
# calculate the means of the samples
|
23
34
|
u1, u2 = np.mean(d1), np.mean(d2)
|
24
35
|
# calculate the effect size
|
25
|
-
if math.isnan(s):
|
26
|
-
return
|
36
|
+
if math.isnan(s) or s == 0:
|
37
|
+
return -1
|
27
38
|
return (int(1000 * np.abs((u1 - u2)) / s)) / 1000
|
28
39
|
|
29
40
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.65.
|
3
|
+
Version: 0.65.5
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -255,6 +255,19 @@ Nkululeko can be used under the [MIT license](https://choosealicense.com/license
|
|
255
255
|
Changelog
|
256
256
|
=========
|
257
257
|
|
258
|
+
Version 0.65.5
|
259
|
+
--------------
|
260
|
+
* added fill_na in plot effect size
|
261
|
+
|
262
|
+
Version 0.65.4
|
263
|
+
--------------
|
264
|
+
* added datasets to distribution
|
265
|
+
* changes in wav2vec2
|
266
|
+
|
267
|
+
Version 0.65.3
|
268
|
+
--------------
|
269
|
+
* various bugfixes
|
270
|
+
|
258
271
|
Version 0.65.2
|
259
272
|
--------------
|
260
273
|
* fixed bug in dataset.csv that prevented correct paths for relative files
|
@@ -2,19 +2,19 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/augment.py,sha256=paq-vEf02XyaPsLjnCbDJbuZind6M6mm0NWAnK5_PKU,1751
|
3
3
|
nkululeko/balancer.py,sha256=64ftZN68sMDfkvuovCDHpAHmSJgCO6Kdk9bwmpSisec,12
|
4
4
|
nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=KGYcdgRK9DOOWiYr-901PUaOgmccfRuUhfuYAEyFvZM,36
|
6
6
|
nkululeko/demo.py,sha256=1JFayJBUqOwj8fDtNF6siuB_5aG6dkb_TeIFPhYv1YI,1858
|
7
7
|
nkululeko/demo_predictor.py,sha256=f3tghu3KxrLFPSrMMqT3E_Owy8alAuHEbkoZuahikEE,2310
|
8
8
|
nkululeko/experiment.py,sha256=fV-TdDPlFhS8DdALuzme0n9-3HnGwi_GxiTak3u_ZAY,24779
|
9
9
|
nkululeko/explore.py,sha256=Ok4Cr1llMQhsP4zCJYWfr5I5HNVuWocGcDnE5QfE9a0,2093
|
10
|
-
nkululeko/export.py,sha256=
|
10
|
+
nkululeko/export.py,sha256=p3EalSrFIJCcZVv_630gY-7QW_WDInkrQzsMpYCL5Fs,4525
|
11
11
|
nkululeko/feature_extractor.py,sha256=YRzFJjsE9omCFfr8DNsHj0uiKPd3_NCjHrWpRGmuhmA,6474
|
12
12
|
nkululeko/file_checker.py,sha256=Jo3TN6sc_XJNF8x1UYHqfPyaMTues0iuj6_Sal4x0so,3428
|
13
13
|
nkululeko/filter_data.py,sha256=LD46OLYfA6UuLvFgaA1LvCVSqmi6JXN2xd_fTZdr-ag,7029
|
14
14
|
nkululeko/glob_conf.py,sha256=KOsmB2_9AVoKNyhtCkaamj-ZYCme6-NjTYF-4wlLgOY,241
|
15
15
|
nkululeko/modelrunner.py,sha256=NYdV4z9TKhtC9LGhOxXa7aBC2AxRjnFJ42Ah01rsbqg,6025
|
16
16
|
nkululeko/nkululeko.py,sha256=SVOY3CPvlmG-16kqV8YOvR2HYCgHkaiVo3GBiwu38W0,1681
|
17
|
-
nkululeko/plots.py,sha256=
|
17
|
+
nkululeko/plots.py,sha256=wwUvHIlpP6wYlukYNWtgqhxh6avqo_okslNgoT4S6u0,13023
|
18
18
|
nkululeko/predict.py,sha256=3ei4wn2by0p9Vkv7cllMcszmEjSM2vX0T6x_5rlgT28,1851
|
19
19
|
nkululeko/reporter.py,sha256=359aeQWt0ZGLseaJnOfafYG8BrwumiM2Q58DWiaoyWQ,10177
|
20
20
|
nkululeko/resample.py,sha256=NDZLIhmGPcxSZwzIZul9oeNfbaHfAmzjTvRWs6isIr4,2120
|
@@ -44,8 +44,8 @@ nkululeko/autopredict/ap_stoi.py,sha256=sx2KSGSjCsjgUvZPFlKALhbZlVg2aTdGmvil-iNS
|
|
44
44
|
nkululeko/autopredict/ap_valence.py,sha256=cyFrKRy68EU5gDiVg0_HxiwaKGbAJ9UuwuiXgVTalAU,997
|
45
45
|
nkululeko/autopredict/estimate_snr.py,sha256=m3JZSRGXr2M9yVgovTSZ63rpho9hIUfHOXfAVRAHf1k,4186
|
46
46
|
nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
47
|
-
nkululeko/data/dataset.py,sha256=
|
48
|
-
nkululeko/data/dataset_csv.py,sha256=
|
47
|
+
nkululeko/data/dataset.py,sha256=3h3gL_zxWjkYdujETxJRJhqbFPNlEbG6iaPyr8CQ-oc,21245
|
48
|
+
nkululeko/data/dataset_csv.py,sha256=YUAeTqCK-aaoRMTzYpMRB00J5E5P8BRH7Jve4RfEzvU,2818
|
49
49
|
nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
50
|
nkululeko/feat_extract/feats_agender.py,sha256=ZlfYbpL65SOTLsoqkcNsREa-EyHpigNf8LlFErQH9Ck,2818
|
51
51
|
nkululeko/feat_extract/feats_agender_agender.py,sha256=zMY3GucOOx-aBav_9bUSdU63rc2VlFxVZHuVqUKQ6Dk,2904
|
@@ -63,7 +63,7 @@ nkululeko/feat_extract/feats_praat.py,sha256=VU9hwL0_1XJQW32CYaW0bqTbl9MryQ1hME9
|
|
63
63
|
nkululeko/feat_extract/feats_snr.py,sha256=UHCNgdZj8g69rj-EyujC1gX-BgYkuIoIm63ZR7Xb78w,2790
|
64
64
|
nkululeko/feat_extract/feats_squim.py,sha256=8AUNfjWf2MHCDIH0by543fwTVFSybBTYWdQpIlZIMi0,4359
|
65
65
|
nkululeko/feat_extract/feats_trill.py,sha256=8BI_wL7YIwaKWTZ8DnLC5RUZ5HVXdLzLXs3QgNexXV8,2958
|
66
|
-
nkululeko/feat_extract/feats_wav2vec2.py,sha256=
|
66
|
+
nkululeko/feat_extract/feats_wav2vec2.py,sha256=48_WK_JmUq0Gda800fJh3RadTxkHeCpOSTBsPIykEjw,4462
|
67
67
|
nkululeko/feat_extract/feats_wavlm.py,sha256=06vRILRLG2-OzsLx-tuwOdtMhNtoPSxJwAE5B82upjk,4466
|
68
68
|
nkululeko/feat_extract/featureset.py,sha256=7l1qC_nVGyyuYOdKJoS5hY673Fkm3Wpqtdu5mxtBP0E,1369
|
69
69
|
nkululeko/feat_extract/feinberg_praat.py,sha256=fcp2863g5NLk7p27HlRISpTOVTaoxcrSGOST4mK5VXk,20496
|
@@ -90,9 +90,9 @@ nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=pmLHuXsaqvcdYxB4PSW9l1mbQW
|
|
90
90
|
nkululeko/segmenting/seg_silero.py,sha256=bbLQxUste1lKEwNRHG4wOTCaFMevNt8TOe2DfmLtu5w,3306
|
91
91
|
nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
92
92
|
nkululeko/utils/files.py,sha256=82EsGvTN85lDdSzetm1ZHTVOUm0AcfQhTeurwsfHN_Q,3547
|
93
|
-
nkululeko/utils/stats.py,sha256=
|
94
|
-
nkululeko-0.65.
|
95
|
-
nkululeko-0.65.
|
96
|
-
nkululeko-0.65.
|
97
|
-
nkululeko-0.65.
|
98
|
-
nkululeko-0.65.
|
93
|
+
nkululeko/utils/stats.py,sha256=nRuZEbHpeRTAcvW8b12ZWi_kDnpC5Ztrmvy0_J621cE,2241
|
94
|
+
nkululeko-0.65.5.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
95
|
+
nkululeko-0.65.5.dist-info/METADATA,sha256=y2ttA1LEwXgflc2MLb1krzgq3zazj780wZ3fIWR8fAg,23458
|
96
|
+
nkululeko-0.65.5.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
97
|
+
nkululeko-0.65.5.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
98
|
+
nkululeko-0.65.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|