nkululeko 0.88.12__py3-none-any.whl → 0.89.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/ensemble.py +9 -1
- nkululeko/feat_extract/feats_analyser.py +7 -4
- nkululeko/models/model_tree.py +3 -1
- nkululeko/reporting/reporter.py +18 -0
- nkululeko/utils/util.py +1 -1
- {nkululeko-0.88.12.dist-info → nkululeko-0.89.1.dist-info}/METADATA +18 -12
- {nkululeko-0.88.12.dist-info → nkululeko-0.89.1.dist-info}/RECORD +11 -11
- {nkululeko-0.88.12.dist-info → nkululeko-0.89.1.dist-info}/WHEEL +1 -1
- {nkululeko-0.88.12.dist-info → nkululeko-0.89.1.dist-info}/LICENSE +0 -0
- {nkululeko-0.88.12.dist-info → nkululeko-0.89.1.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.
|
1
|
+
VERSION="0.89.1"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/ensemble.py
CHANGED
@@ -26,7 +26,11 @@ from pathlib import Path
|
|
26
26
|
|
27
27
|
import numpy as np
|
28
28
|
import pandas as pd
|
29
|
-
from sklearn.metrics import
|
29
|
+
from sklearn.metrics import(
|
30
|
+
balanced_accuracy_score,
|
31
|
+
classification_report,
|
32
|
+
f1_score
|
33
|
+
)
|
30
34
|
|
31
35
|
from nkululeko.constants import VERSION
|
32
36
|
from nkululeko.experiment import Experiment
|
@@ -284,6 +288,10 @@ def ensemble_predictions(
|
|
284
288
|
predicted = ensemble_preds["predicted"]
|
285
289
|
uar = balanced_accuracy_score(truth, predicted)
|
286
290
|
acc = (truth == predicted).mean()
|
291
|
+
# print classification report
|
292
|
+
Util("ensemble").debug(f"\n {classification_report(truth, predicted)}")
|
293
|
+
# f1 = f1_score(truth, predicted, pos_label='p')
|
294
|
+
# Util("ensemble").debug(f"F1: {f1:.3f}")
|
287
295
|
Util("ensemble").debug(f"{method}: UAR: {uar:.3f}, ACC: {acc:.3f}")
|
288
296
|
|
289
297
|
return ensemble_preds
|
@@ -139,7 +139,7 @@ class FeatureAnalyser:
|
|
139
139
|
elif model_s == "svm":
|
140
140
|
from sklearn.svm import SVC
|
141
141
|
|
142
|
-
c = float(self.util.config_val("MODEL", "C_val", "0
|
142
|
+
c = float(self.util.config_val("MODEL", "C_val", "1.0"))
|
143
143
|
model = SVC(kernel="linear", C=c, gamma="scale")
|
144
144
|
result_importances[model_s] = self._get_importance(
|
145
145
|
model, permutation
|
@@ -205,7 +205,7 @@ class FeatureAnalyser:
|
|
205
205
|
model, permutation
|
206
206
|
)
|
207
207
|
elif model_s == "xgr":
|
208
|
-
from xgboost import
|
208
|
+
from xgboost import XGBRegressor
|
209
209
|
|
210
210
|
model = XGBRegressor()
|
211
211
|
result_importances[model_s] = self._get_importance(
|
@@ -270,12 +270,14 @@ class FeatureAnalyser:
|
|
270
270
|
)
|
271
271
|
)
|
272
272
|
|
273
|
+
# print feature importance values to file and debug and save to result
|
274
|
+
self.util.debug(f"Importance features from {model_name}: features = \n{df_imp['feats'].values.tolist()}")
|
273
275
|
# result file
|
274
276
|
res_dir = self.util.get_path("res_dir")
|
275
277
|
filename = f"_EXPL_{model_name}"
|
276
278
|
if permutation:
|
277
279
|
filename += "_perm"
|
278
|
-
filename = f"{res_dir}{self.util.get_exp_name(only_data=True)}{filename}_{
|
280
|
+
filename = f"{res_dir}{self.util.get_exp_name(only_data=True)}{filename}_{max_feat_num}_fi.txt"
|
279
281
|
with open(filename, "w") as text_file:
|
280
282
|
text_file.write(
|
281
283
|
"features in order of decreasing importance according to model"
|
@@ -283,7 +285,8 @@ class FeatureAnalyser:
|
|
283
285
|
)
|
284
286
|
|
285
287
|
df_imp.to_csv(filename, mode="a")
|
286
|
-
|
288
|
+
self.util.debug(f"Saved feature importance values to {filename}")
|
289
|
+
|
287
290
|
# check if feature distributions should be plotted
|
288
291
|
plot_feats = self.util.config_val("EXPL", "feature_distributions", False)
|
289
292
|
if plot_feats:
|
nkululeko/models/model_tree.py
CHANGED
@@ -12,4 +12,6 @@ class Tree_model(Model):
|
|
12
12
|
def __init__(self, df_train, df_test, feats_train, feats_test):
|
13
13
|
super().__init__(df_train, df_test, feats_train, feats_test)
|
14
14
|
self.name = "tree"
|
15
|
-
self.clf = DecisionTreeClassifier(
|
15
|
+
self.clf = DecisionTreeClassifier(
|
16
|
+
random_state=42
|
17
|
+
) # set up the classifier
|
nkululeko/reporting/reporter.py
CHANGED
@@ -27,6 +27,7 @@ from sklearn.metrics import (
|
|
27
27
|
r2_score,
|
28
28
|
roc_auc_score,
|
29
29
|
roc_curve,
|
30
|
+
RocCurveDisplay,
|
30
31
|
)
|
31
32
|
|
32
33
|
import nkululeko.glob_conf as glob_conf
|
@@ -75,6 +76,7 @@ class Reporter:
|
|
75
76
|
self.result = Result(0, 0, 0, 0, "unknown")
|
76
77
|
self.run = run
|
77
78
|
self.epoch = epoch
|
79
|
+
self.model_type = self.util.get_model_type()
|
78
80
|
self._set_metric()
|
79
81
|
self.filenameadd = ""
|
80
82
|
self.cont_to_cat = False
|
@@ -387,6 +389,7 @@ class Reporter:
|
|
387
389
|
epoch = self.epoch
|
388
390
|
"""Print all evaluation values to text file."""
|
389
391
|
res_dir = self.util.get_path("res_dir")
|
392
|
+
fig_dir = self.util.get_path("fig_dir")
|
390
393
|
file_name = f"{res_dir}{self.util.get_exp_name()}_{epoch}{self.filenameadd}.txt"
|
391
394
|
if self.util.exp_is_classification():
|
392
395
|
labels = glob_conf.labels
|
@@ -397,6 +400,10 @@ class Reporter:
|
|
397
400
|
target_names=labels,
|
398
401
|
output_dict=True,
|
399
402
|
)
|
403
|
+
# print classifcation report in console
|
404
|
+
self.util.debug(
|
405
|
+
f"\n {classification_report(self.truths, self.preds, target_names=labels)}"
|
406
|
+
)
|
400
407
|
except ValueError as e:
|
401
408
|
self.util.debug(
|
402
409
|
"Reporter: caught a ValueError when trying to get"
|
@@ -415,6 +422,17 @@ class Reporter:
|
|
415
422
|
if len(np.unique(self.truths)) == 2:
|
416
423
|
fpr, tpr, _ = roc_curve(self.truths, self.preds)
|
417
424
|
auc_score = auc(fpr, tpr)
|
425
|
+
display = RocCurveDisplay(
|
426
|
+
fpr=fpr,
|
427
|
+
tpr=tpr,
|
428
|
+
roc_auc=auc_score,
|
429
|
+
estimator_name=f"{self.model_type} estimator",
|
430
|
+
)
|
431
|
+
# save plot
|
432
|
+
plot_path = f"{fig_dir}{self.util.get_exp_name()}_{epoch}{self.filenameadd}_roc.{self.format}"
|
433
|
+
display.plot(ax=None)
|
434
|
+
plt.savefig(plot_path)
|
435
|
+
self.util.debug(f"Saved ROC curve to {plot_path}")
|
418
436
|
pauc_score = roc_auc_score(self.truths, self.preds, max_fpr=0.1)
|
419
437
|
auc_pauc = f"auc: {auc_score:.3f}, pauc: {pauc_score:.3f} from epoch: {epoch}"
|
420
438
|
self.util.debug(auc_pauc)
|
nkululeko/utils/util.py
CHANGED
@@ -175,7 +175,7 @@ class Util:
|
|
175
175
|
"""Get the experiment directory."""
|
176
176
|
root = os.path.join(self.config["EXP"]["root"], "")
|
177
177
|
name = self.config["EXP"]["name"]
|
178
|
-
dir_name = f"{root}{name}"
|
178
|
+
dir_name = f"{root}/{name}"
|
179
179
|
audeer.mkdir(dir_name)
|
180
180
|
return dir_name
|
181
181
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.89.1
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -204,7 +204,7 @@ All of them take *--config <my_config.ini>* as an argument.
|
|
204
204
|
* *configurations*: which experiments to combine
|
205
205
|
* *--method* (optional): majority_voting, mean (default), max, sum, uncertainty, uncertainty_weighted, confidence_weighted, performance_weighted
|
206
206
|
* *--threshold*: uncertainty threshold (1.0 means no threshold)
|
207
|
-
* *--
|
207
|
+
* *--weights*: weights for performance_weighted method (could be from previous UAR, ACC)
|
208
208
|
* *--outfile* (optional): name of CSV file for output (default: ensemble_result.csv)
|
209
209
|
* *--no_labels* (optional): indicate that no ground truth is given
|
210
210
|
* **nkululeko.multidb**: do [multiple experiments](http://blog.syntheticspeech.de/2024/01/02/nkululeko-compare-several-databases/), comparing several databases cross and in itself
|
@@ -220,14 +220,11 @@ All of them take *--config <my_config.ini>* as an argument.
|
|
220
220
|
* **nkululeko.predict**: [predict features](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/) like SNR, MOS, arousal/valence, age/gender, with DNN models
|
221
221
|
* **nkululeko.segment**: [segment a database](http://blog.syntheticspeech.de/2023/07/14/nkululeko-segmenting-a-database/) based on VAD (voice activity detection)
|
222
222
|
* **nkululeko.resample**: check on all [sampling rates and change](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/) to 16kHz
|
223
|
-
* **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command-line.
|
224
|
-
* usage: nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET]
|
225
|
-
[--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
223
|
+
* **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command-line. Usage:
|
230
224
|
|
225
|
+
```bash
|
226
|
+
$ python -m nkululeko.nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET] [--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
|
227
|
+
```
|
231
228
|
|
232
229
|
There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
|
233
230
|
* [Introduction](http://blog.syntheticspeech.de/2021/08/04/machine-learning-experiment-framework/)
|
@@ -359,8 +356,17 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
359
356
|
Changelog
|
360
357
|
=========
|
361
358
|
|
362
|
-
Version 0.
|
359
|
+
Version 0.89.1
|
363
360
|
--------------
|
361
|
+
* print and save result of feature importance
|
362
|
+
|
363
|
+
Version 0.89.0
|
364
|
+
--------------
|
365
|
+
* added Roc plots and classification report on Debug
|
366
|
+
|
367
|
+
|
368
|
+
Version 0.88.12
|
369
|
+
---------------
|
364
370
|
* added n_jobs for sklearn processing
|
365
371
|
* re_named num_workers n_jobs
|
366
372
|
|
@@ -833,9 +839,9 @@ Version 0.66.3
|
|
833
839
|
|
834
840
|
Version 0.66.2
|
835
841
|
--------------
|
836
|
-
* enabled data-
|
842
|
+
* enabled data-pacthes with quotes
|
837
843
|
* enabled missing category labels
|
838
|
-
* used
|
844
|
+
* used tqdm for progress display
|
839
845
|
|
840
846
|
Version 0.66.1
|
841
847
|
--------------
|
@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
|
3
3
|
nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
|
4
4
|
nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=nRA0bWrvi-5tXm8QWv4dzDE-3sujMiz26U4QgSVuck0,39
|
6
6
|
nkululeko/demo.py,sha256=bLuHkeEl5rOfm7ecGHCcWATiPK7-njNbtrGljxzNzFs,5088
|
7
7
|
nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
|
8
8
|
nkululeko/demo_predictor.py,sha256=zs1bjhpnKuNCPLJeiyDm19ME1NEDOQT3QNeyVKJq9Yc,4882
|
9
|
-
nkululeko/ensemble.py,sha256=
|
9
|
+
nkululeko/ensemble.py,sha256=egtOFxEp7gjuM5cKBfETnhTn1-7_4zWBPEah65K1C3U,12927
|
10
10
|
nkululeko/experiment.py,sha256=L4PzoScPLG2xTyniVy9evcBy_8CIe3RTeTEUVTqiuvQ,31186
|
11
11
|
nkululeko/explore.py,sha256=_GOgcRaPvh2xBbKPAkSJjYzgHhD_xb3ZCB6M1MPA6ao,3867
|
12
12
|
nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
|
@@ -51,7 +51,7 @@ nkululeko/data/dataset_csv.py,sha256=UGEpi__eT2KFS6Fop6N4HkMrzO-u5VP71gt44kwZavo
|
|
51
51
|
nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
52
52
|
nkululeko/feat_extract/feats_agender.py,sha256=tMK3_qs8adylNNSR0CS1RjU9RxmpumLqmuyzmc2ZYjA,3184
|
53
53
|
nkululeko/feat_extract/feats_agender_agender.py,sha256=19NoRT0KJ8WoZ3EabTYexXymD7bDy58-H20jYmdqjD0,3498
|
54
|
-
nkululeko/feat_extract/feats_analyser.py,sha256=
|
54
|
+
nkululeko/feat_extract/feats_analyser.py,sha256=eW0v7Boybfj2gXi77MPjaLyHUQ1C42mx9hgoQeDwNac,12999
|
55
55
|
nkululeko/feat_extract/feats_ast.py,sha256=ycJn5eSVOxcEpmeHVk0FPB8q5XiTC8VSKz61L9n0Wa4,4638
|
56
56
|
nkululeko/feat_extract/feats_auddim.py,sha256=ulP_o4SGeQDFTs8YYCGKgccARAo6-wcjPK6-hhGjmn8,3155
|
57
57
|
nkululeko/feat_extract/feats_audmodel.py,sha256=aRGTBDKdYaTT_9xDaFZqpuyPhzxSNN_3b1PJDUHtYW4,3180
|
@@ -88,7 +88,7 @@ nkululeko/models/model_mlp.py,sha256=CaR0PCRBcdCo_hhC5r9Q6IbVIApvtoRVrUdZsgzbx1M
|
|
88
88
|
nkululeko/models/model_mlp_regression.py,sha256=YMHMWRlWL6iL8HdYe6rTAoAW6GwHBx3PDvysCZYj5tQ,10186
|
89
89
|
nkululeko/models/model_svm.py,sha256=AzWksBRbIdpUuMbDnAh_YAXebewR5POj9AkB9VC40pI,1010
|
90
90
|
nkululeko/models/model_svr.py,sha256=_YZeksqB3eBENGlg3g9RwYFlk9rQQ-XCeNBKLlGGVoE,725
|
91
|
-
nkululeko/models/model_tree.py,sha256=
|
91
|
+
nkululeko/models/model_tree.py,sha256=KScDTGgkOePTZEcereB7bxQ47wIKhYI-xhTKCU4cKDk,454
|
92
92
|
nkululeko/models/model_tree_reg.py,sha256=IgQcPTE-304HQLYSKPF8Z4ot_Ur9dH01fZjS0nXke_M,428
|
93
93
|
nkululeko/models/model_tuned.py,sha256=vmNBkqvEH-4nnhY1REXDA9kA4vpZJzeRmGJFq7E3bLM,21340
|
94
94
|
nkululeko/models/model_xgb.py,sha256=Thgx5ESdIok4v72mKh4plxpo4smGcKALWNCJTDScY0M,447
|
@@ -98,7 +98,7 @@ nkululeko/reporting/defines.py,sha256=IsY1YgKRMaABpylVKjBJgJ5bNCEbGCVA_E6pivraqS
|
|
98
98
|
nkululeko/reporting/latex_writer.py,sha256=qiCRSmB4KOD_za4oHu5x-PhwjZohzfo8wecMOwlXZwc,1886
|
99
99
|
nkululeko/reporting/report.py,sha256=W0rcigDdjBvxZQ3pZja_gvToILYvaZ1BFtnN2qFRfYI,1060
|
100
100
|
nkululeko/reporting/report_item.py,sha256=siWeGNgo4bAE46YBMNcsdf3jTMTy76BO9Fi6DTvDig4,533
|
101
|
-
nkululeko/reporting/reporter.py,sha256=
|
101
|
+
nkululeko/reporting/reporter.py,sha256=xFyGj6gQ8T1WB3w3tJ0awlgQcq1e3IKXEIfl_DvOngg,19996
|
102
102
|
nkululeko/reporting/result.py,sha256=G63a2tHCwHhM6NBJgYzsWKWJm4Yu3r4hsCHA2Km7eHU,1073
|
103
103
|
nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
104
104
|
nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=pmLHuXsaqvcdYxB4PSW9l1mbQWZZBJFhi_CGabqydas,1947
|
@@ -106,9 +106,9 @@ nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5Ev
|
|
106
106
|
nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
107
107
|
nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
|
108
108
|
nkululeko/utils/stats.py,sha256=eC9dMO-by6CDnGLHDBQu-2B4-BudZNJ0nnWGhKYdUMA,2968
|
109
|
-
nkululeko/utils/util.py,sha256=
|
110
|
-
nkululeko-0.
|
111
|
-
nkululeko-0.
|
112
|
-
nkululeko-0.
|
113
|
-
nkululeko-0.
|
114
|
-
nkululeko-0.
|
109
|
+
nkululeko/utils/util.py,sha256=363Lgmcg6fPKCGbroX0DDyW_zcYNx-Ayqv67qdpfYcw,16710
|
110
|
+
nkululeko-0.89.1.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
111
|
+
nkululeko-0.89.1.dist-info/METADATA,sha256=AuVssWNRMXlseH5xSzcls--AAYLFSeEbFtHbAFT2o_o,40667
|
112
|
+
nkululeko-0.89.1.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
|
113
|
+
nkululeko-0.89.1.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
114
|
+
nkululeko-0.89.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|