nkululeko 0.91.1__py3-none-any.whl → 0.91.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/autopredict/ap_sid.py +43 -0
- nkululeko/constants.py +1 -1
- nkululeko/data/dataset_csv.py +7 -1
- nkululeko/experiment.py +9 -1
- nkululeko/plots.py +3 -0
- nkululeko/segment.py +1 -1
- nkululeko/test_pretrain.py +0 -1
- {nkululeko-0.91.1.dist-info → nkululeko-0.91.3.dist-info}/METADATA +12 -3
- {nkululeko-0.91.1.dist-info → nkululeko-0.91.3.dist-info}/RECORD +13 -12
- {nkululeko-0.91.1.dist-info → nkululeko-0.91.3.dist-info}/WHEEL +1 -1
- {nkululeko-0.91.1.dist-info → nkululeko-0.91.3.dist-info}/LICENSE +0 -0
- {nkululeko-0.91.1.dist-info → nkululeko-0.91.3.dist-info}/entry_points.txt +0 -0
- {nkululeko-0.91.1.dist-info → nkululeko-0.91.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,43 @@
|
|
1
|
+
""""
|
2
|
+
A predictor for sid - Speaker ID.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from pyannote.audio import Pipeline
|
6
|
+
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
|
10
|
+
import nkululeko.glob_conf as glob_conf
|
11
|
+
from nkululeko.feature_extractor import FeatureExtractor
|
12
|
+
from nkululeko.utils.util import Util
|
13
|
+
|
14
|
+
|
15
|
+
class SIDPredictor:
|
16
|
+
"""SIDPredictor.
|
17
|
+
|
18
|
+
predicting speaker id.
|
19
|
+
"""
|
20
|
+
|
21
|
+
def __init__(self, df):
|
22
|
+
self.df = df
|
23
|
+
self.util = Util("sidPredictor")
|
24
|
+
self.pipeline = Pipeline.from_pretrained(
|
25
|
+
"pyannote/speaker-diarization-3.1",
|
26
|
+
use_auth_token="HUGGINGFACE_ACCESS_TOKEN_GOES_HERE",
|
27
|
+
)
|
28
|
+
|
29
|
+
def predict(self, split_selection):
|
30
|
+
self.util.debug(f"estimating PESQ for {split_selection} samples")
|
31
|
+
return_df = self.df.copy()
|
32
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
|
33
|
+
self.feature_extractor = FeatureExtractor(
|
34
|
+
self.df, ["squim"], feats_name, split_selection
|
35
|
+
)
|
36
|
+
result_df = self.feature_extractor.extract()
|
37
|
+
# replace missing values by 0
|
38
|
+
result_df = result_df.fillna(0)
|
39
|
+
result_df = result_df.replace(np.nan, 0)
|
40
|
+
result_df.replace([np.inf, -np.inf], 0, inplace=True)
|
41
|
+
pred_vals = result_df.pesq * 100
|
42
|
+
return_df["pesq_pred"] = pred_vals.astype("int") / 100
|
43
|
+
return return_df
|
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.91.
|
1
|
+
VERSION="0.91.3"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/data/dataset_csv.py
CHANGED
@@ -39,6 +39,8 @@ class Dataset_CSV(Dataset):
|
|
39
39
|
df = audformat.utils.read_csv(data_file)
|
40
40
|
if isinstance(df, pd.Series):
|
41
41
|
df = df.to_frame()
|
42
|
+
elif isinstance(df, pd.Index):
|
43
|
+
df = pd.DataFrame(index=df)
|
42
44
|
rename_cols = self.util.config_val_data(self.name, "colnames", False)
|
43
45
|
if rename_cols:
|
44
46
|
col_dict = ast.literal_eval(rename_cols)
|
@@ -78,7 +80,11 @@ class Dataset_CSV(Dataset):
|
|
78
80
|
|
79
81
|
self.df = df
|
80
82
|
self.db = None
|
81
|
-
self.
|
83
|
+
target = self.util.config_val("DATA", "target", None)
|
84
|
+
if target is not None:
|
85
|
+
self.got_target = True
|
86
|
+
else:
|
87
|
+
self.got_target = False
|
82
88
|
self.is_labeled = self.got_target
|
83
89
|
self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
|
84
90
|
is_index = False
|
nkululeko/experiment.py
CHANGED
@@ -125,7 +125,15 @@ class Experiment:
|
|
125
125
|
# df = pd.read_csv(storage, header=0, index_col=[0,1,2])
|
126
126
|
# df.index.set_levels(pd.to_timedelta(df.index.levels[1]), level=1)
|
127
127
|
# df.index.set_levels(pd.to_timedelta(df.index.levels[2]), level=2)
|
128
|
-
|
128
|
+
try:
|
129
|
+
df = audformat.utils.read_csv(storage)
|
130
|
+
except ValueError:
|
131
|
+
# split might be empty
|
132
|
+
return pd.DataFrame()
|
133
|
+
if isinstance(df, pd.Series):
|
134
|
+
df = df.to_frame()
|
135
|
+
elif isinstance(df, pd.Index):
|
136
|
+
df = pd.DataFrame(index=df)
|
129
137
|
df.is_labeled = True if self.target in df else False
|
130
138
|
# print(df.head())
|
131
139
|
return df
|
nkululeko/plots.py
CHANGED
@@ -305,6 +305,9 @@ class Plots:
|
|
305
305
|
except AttributeError as ae:
|
306
306
|
self.util.warn(ae)
|
307
307
|
ax = sns.histplot(df, x="duration", kde=True)
|
308
|
+
except ValueError as error:
|
309
|
+
self.util.warn(error)
|
310
|
+
ax = sns.histplot(df, x="duration", kde=True)
|
308
311
|
min = self.util.to_3_digits(df.duration.min())
|
309
312
|
max = self.util.to_3_digits(df.duration.max())
|
310
313
|
title = f"Duration distr. for {sample_selection} {df.shape[0]}. min={min}, max={max}"
|
nkululeko/segment.py
CHANGED
@@ -110,7 +110,7 @@ def main():
|
|
110
110
|
)
|
111
111
|
print("")
|
112
112
|
# remove encoded labels
|
113
|
-
target = util.config_val("DATA", "target",
|
113
|
+
target = util.config_val("DATA", "target", None)
|
114
114
|
if "class_label" in df_seg.columns:
|
115
115
|
df_seg = df_seg.drop(columns=[target])
|
116
116
|
df_seg = df_seg.rename(columns={"class_label": target})
|
nkululeko/test_pretrain.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.91.
|
3
|
+
Version: 0.91.3
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -200,7 +200,7 @@ All of them take *--config <my_config.ini>* as an argument.
|
|
200
200
|
|
201
201
|
* **nkululeko.nkululeko**: do machine learning experiments combining features and learners
|
202
202
|
* **nkululeko.ensemble**: [combine several nkululeko experiments](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/) and report on late fusion results
|
203
|
-
*
|
203
|
+
* *--config*: which experiments (INI files) to combine
|
204
204
|
* *--method* (optional): majority_voting, mean (default), max, sum, uncertainty, uncertainty_weighted, confidence_weighted, performance_weighted
|
205
205
|
* *--threshold*: uncertainty threshold (1.0 means no threshold)
|
206
206
|
* *--weights*: weights for performance_weighted method (could be from previous UAR, ACC)
|
@@ -261,7 +261,7 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
|
|
261
261
|
* [Predict new labels for your data from public models and check bias](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/)
|
262
262
|
* [Resample](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/)
|
263
263
|
* [Get some statistics on correlation and effect-size](http://blog.syntheticspeech.de/2023/09/05/nkululeko-get-some-statistics-on-correlation-and-effect-size/)
|
264
|
-
* [Automatic generation
|
264
|
+
* [Automatic generation of a latex / pdf report](http://blog.syntheticspeech.de/2023/09/26/nkululeko-generate-a-latex-pdf-report/)
|
265
265
|
* [Inspect your data with Spotlight](http://blog.syntheticspeech.de/2023/10/31/nkululeko-inspect-your-data-with-spotlight/)
|
266
266
|
* [Automatically stratify your split sets](http://blog.syntheticspeech.de/2023/11/07/nkululeko-automatically-stratify-your-split-sets/)
|
267
267
|
* [re-name data column names](http://blog.syntheticspeech.de/2023/11/16/nkululeko-re-name-data-column-names/)
|
@@ -355,6 +355,15 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
355
355
|
Changelog
|
356
356
|
=========
|
357
357
|
|
358
|
+
Version 0.91.3
|
359
|
+
--------------
|
360
|
+
* some additions for robustness
|
361
|
+
|
362
|
+
Version 0.91.2
|
363
|
+
--------------
|
364
|
+
* making lint work by excluding constants from check
|
365
|
+
|
366
|
+
Version 0.91.1
|
358
367
|
--------------
|
359
368
|
* minor refactoring in ensemble module
|
360
369
|
|
@@ -2,13 +2,13 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=FoMbBrfyOZd4QAw7oIHl3X6-UpsqAKWVDIolCA7qOWs,3196
|
3
3
|
nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
|
4
4
|
nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=tT5Su7q7ufkiVtsEMOV5FgJVkE2U8idmrVxrNi4gFmc,39
|
6
6
|
nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
|
7
7
|
nkululeko/demo.py,sha256=4Yzhg6pCPBYPGJrP7JX2TysVosl_R1llpVDKc2P_gUA,4955
|
8
8
|
nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
|
9
9
|
nkululeko/demo_predictor.py,sha256=lDF-xOxRdEAclOmbepAYg-BQXQdGkHfq2n74PTIoop8,4872
|
10
10
|
nkululeko/ensemble.py,sha256=71V-rre61H3J4sh7lu-OTo4I2_g7mm_rQxwW1ARDHgY,12782
|
11
|
-
nkululeko/experiment.py,sha256=
|
11
|
+
nkululeko/experiment.py,sha256=kRz3diGyupXneXFBLmmUm6BV-sGSqQJv44Zojn0Bhcs,31649
|
12
12
|
nkululeko/explore.py,sha256=Y5lPPychnI-7fyP8zvwVb9P09fvprbUPOofOppuABYQ,3658
|
13
13
|
nkululeko/export.py,sha256=U-V4acxtuL6qKt6oAsVcM5TTeWogYUJ3GU-lA6rq6d4,4336
|
14
14
|
nkululeko/feature_extractor.py,sha256=UnspIWz3XrNhKnBBhWZkH2bHvD-sROtrQVqB1JvkUyw,4088
|
@@ -20,16 +20,16 @@ nkululeko/modelrunner.py,sha256=lJy-xM4QfDDWeL0dLTE_VIb4sYrnd_Z_yJRK3wwohQA,1119
|
|
20
20
|
nkululeko/multidb.py,sha256=sO6OwJn8sn1-C-ig3thsIL8QMWHdV9SnJhDodKjeKrI,6876
|
21
21
|
nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
|
22
22
|
nkululeko/nkululeko.py,sha256=M7baIq2nAoi6dEoBL4ATEuqAs5U1fvl_hyqAl5DybAQ,2040
|
23
|
-
nkululeko/plots.py,sha256=
|
23
|
+
nkululeko/plots.py,sha256=sR061gOsyvuh8UBYS52FINSal4CYNQgvq3B4WOSimDw,23092
|
24
24
|
nkululeko/predict.py,sha256=b35YOqovGb5PLDz0nDuhJGykEAPq2Y45R9lzxJZMuMU,2083
|
25
25
|
nkululeko/resample.py,sha256=akSAjJ3qn-O5NAyLJHVHdsK7MUZPGaZUvM2TwMSmj2M,5194
|
26
26
|
nkululeko/runmanager.py,sha256=AswmORVUkCIH0gTx6zEyufvFATQBS8C5TXo2erSNdVg,7611
|
27
27
|
nkululeko/scaler.py,sha256=7VOZ4sREMoQtahfETt9RyuR29Fb7PCwxlYVjBbdCVFc,4101
|
28
|
-
nkululeko/segment.py,sha256=
|
28
|
+
nkululeko/segment.py,sha256=PAc5sVLoqKzOVENd9A5ybaKrJOvD9WEPwDdJGTv6OIM,4319
|
29
29
|
nkululeko/syllable_nuclei.py,sha256=5w_naKxNxz66a_qLkraemi2fggM-gWesiiBPS47iFcE,9931
|
30
30
|
nkululeko/test.py,sha256=1w624vo5KTzmFC8BUStGlLDmIEAFuJUz7J0W-gp7AxI,1677
|
31
31
|
nkululeko/test_predictor.py,sha256=DEHE_D3A6m6KJTrpDKceA1n655t_UZV3WQd57K4a3Ho,2863
|
32
|
-
nkululeko/test_pretrain.py,sha256=
|
32
|
+
nkululeko/test_pretrain.py,sha256=6FZeETlWzg9Cq_sn3BFKhfH91jW26nAIDm1bJkInNNA,8463
|
33
33
|
nkululeko/augmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
34
|
nkululeko/augmenting/augmenter.py,sha256=TUUznEz0pe9DSMC9r7LoBckuvsJTprvypeV5-8zLn20,2846
|
35
35
|
nkululeko/augmenting/randomsplicer.py,sha256=TKPqp8np5dvyJIAjOTvtlanatFQ9OwKxZ02QoCwZ2Jw,2802
|
@@ -43,13 +43,14 @@ nkululeko/autopredict/ap_gender.py,sha256=b6oTqHKVwOnYh4YlKbuMflssS4HJqs_c1ayusa
|
|
43
43
|
nkululeko/autopredict/ap_mos.py,sha256=e4hmgb0Yf1_AbC5P0CqXJIvufjhbTrqmI5goARxrY0Y,1107
|
44
44
|
nkululeko/autopredict/ap_pesq.py,sha256=mRt3Loucaoy4vJxwfuxUt0fP88bMGvkmrLCEpKEXWp0,1140
|
45
45
|
nkululeko/autopredict/ap_sdr.py,sha256=VQ2UkxOO3ipqYNNjFwKgEaGCk8IzLI5lX_2tZFLIvTY,1188
|
46
|
+
nkululeko/autopredict/ap_sid.py,sha256=1TYJg-Bg_LJGPzIWF-oYtXmD5Otvi0fW_f8uzEVZY5g,1309
|
46
47
|
nkululeko/autopredict/ap_snr.py,sha256=AiTU8-7CMEeowmYkMO19lw1HCb1yTXC6KeulNf8gOqw,1110
|
47
48
|
nkululeko/autopredict/ap_stoi.py,sha256=UEQg1ZV0meAsxgdWB8ieRs9GPXHqArmsaOyCGRwpcnA,1187
|
48
49
|
nkululeko/autopredict/ap_valence.py,sha256=WrW4Ltqi_odW49_4QEVKkfnrcztLIVZ4cXIEHu4dBN8,1026
|
49
50
|
nkululeko/autopredict/estimate_snr.py,sha256=1k9-XadABudnsNOeFZD_Fg0E64-GUQVS7JEp82MLQS4,4995
|
50
51
|
nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
52
|
nkululeko/data/dataset.py,sha256=Hz2IOsdcESG-P3aP7r4d1xj_gIP6fyGCYOwukoQ7SM8,29321
|
52
|
-
nkululeko/data/dataset_csv.py,sha256=
|
53
|
+
nkululeko/data/dataset_csv.py,sha256=mb7FpHOmJHxpt1QYVBKveyqJN3MUt30TRfwwJw0TT5c,4816
|
53
54
|
nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
54
55
|
nkululeko/feat_extract/feats_agender.py,sha256=onfAQ6-xx_mFMJXEF1IX8cHBmGtGeX6weJmxbkfh1_o,3184
|
55
56
|
nkululeko/feat_extract/feats_agender_agender.py,sha256=_YQv1qw--3uQfnyTQDCwlmPRnrhdMhgXbYK2yQtseW0,3464
|
@@ -110,9 +111,9 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
110
111
|
nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
|
111
112
|
nkululeko/utils/stats.py,sha256=vCRzhCR0Gx5SiJyAGbj1TIto8ocGz58CM5Pr3LltagA,2948
|
112
113
|
nkululeko/utils/util.py,sha256=XFZdhCc_LM4EmoZ5tKKaBCQLXclcNmvHwhfT_CXB98c,16723
|
113
|
-
nkululeko-0.91.
|
114
|
-
nkululeko-0.91.
|
115
|
-
nkululeko-0.91.
|
116
|
-
nkululeko-0.91.
|
117
|
-
nkululeko-0.91.
|
118
|
-
nkululeko-0.91.
|
114
|
+
nkululeko-0.91.3.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
115
|
+
nkululeko-0.91.3.dist-info/METADATA,sha256=FI1hcOEFJbfAHNQi1SCFhPv7sC06vdWa75lIw2Ix4YA,41584
|
116
|
+
nkululeko-0.91.3.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
117
|
+
nkululeko-0.91.3.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
|
118
|
+
nkululeko-0.91.3.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
119
|
+
nkululeko-0.91.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|