nkululeko 0.89.2__py3-none-any.whl → 0.90.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/aug_train.py +6 -4
- nkululeko/augment.py +8 -6
- nkululeko/augmenting/augmenter.py +4 -4
- nkululeko/augmenting/randomsplicer.py +12 -9
- nkululeko/augmenting/randomsplicing.py +2 -3
- nkululeko/augmenting/resampler.py +9 -6
- nkululeko/autopredict/ap_age.py +4 -2
- nkululeko/autopredict/ap_arousal.py +4 -2
- nkululeko/autopredict/ap_dominance.py +3 -2
- nkululeko/autopredict/ap_gender.py +4 -2
- nkululeko/autopredict/ap_mos.py +5 -2
- nkululeko/autopredict/ap_pesq.py +5 -2
- nkululeko/autopredict/ap_sdr.py +5 -2
- nkululeko/autopredict/ap_snr.py +5 -2
- nkululeko/autopredict/ap_stoi.py +5 -2
- nkululeko/autopredict/ap_valence.py +4 -2
- nkululeko/autopredict/estimate_snr.py +10 -14
- nkululeko/cacheddataset.py +1 -1
- nkululeko/constants.py +1 -1
- nkululeko/data/dataset.py +19 -16
- nkululeko/data/dataset_csv.py +5 -3
- nkululeko/demo-ft.py +29 -0
- nkululeko/demo_feats.py +5 -4
- nkululeko/demo_predictor.py +3 -4
- nkululeko/ensemble.py +27 -28
- nkululeko/experiment.py +11 -7
- nkululeko/experiment_felix.py +728 -0
- nkululeko/explore.py +1 -0
- nkululeko/export.py +7 -5
- nkululeko/feat_extract/feats_agender.py +5 -4
- nkululeko/feat_extract/feats_agender_agender.py +7 -6
- nkululeko/feat_extract/feats_analyser.py +18 -16
- nkululeko/feat_extract/feats_ast.py +9 -8
- nkululeko/feat_extract/feats_auddim.py +3 -5
- nkululeko/feat_extract/feats_audmodel.py +2 -2
- nkululeko/feat_extract/feats_clap.py +9 -12
- nkululeko/feat_extract/feats_hubert.py +2 -3
- nkululeko/feat_extract/feats_import.py +5 -4
- nkululeko/feat_extract/feats_mld.py +3 -5
- nkululeko/feat_extract/feats_mos.py +4 -3
- nkululeko/feat_extract/feats_opensmile.py +4 -3
- nkululeko/feat_extract/feats_oxbow.py +5 -4
- nkululeko/feat_extract/feats_praat.py +4 -7
- nkululeko/feat_extract/feats_snr.py +3 -5
- nkululeko/feat_extract/feats_spectra.py +8 -9
- nkululeko/feat_extract/feats_spkrec.py +6 -11
- nkululeko/feat_extract/feats_squim.py +2 -4
- nkululeko/feat_extract/feats_trill.py +2 -5
- nkululeko/feat_extract/feats_wav2vec2.py +8 -4
- nkululeko/feat_extract/feats_wavlm.py +2 -3
- nkululeko/feat_extract/feats_whisper.py +4 -6
- nkululeko/feat_extract/featureset.py +4 -2
- nkululeko/feat_extract/feinberg_praat.py +1 -3
- nkululeko/feat_extract/transformer_feature_extractor.py +147 -0
- nkululeko/file_checker.py +3 -3
- nkululeko/filter_data.py +3 -1
- nkululeko/fixedsegment.py +83 -0
- nkululeko/models/model.py +3 -5
- nkululeko/models/model_bayes.py +1 -0
- nkululeko/models/model_cnn.py +4 -6
- nkululeko/models/model_gmm.py +13 -9
- nkululeko/models/model_knn.py +1 -0
- nkululeko/models/model_knn_reg.py +1 -0
- nkululeko/models/model_lin_reg.py +1 -0
- nkululeko/models/model_mlp.py +2 -3
- nkululeko/models/model_mlp_regression.py +1 -6
- nkululeko/models/model_svm.py +2 -2
- nkululeko/models/model_svr.py +1 -0
- nkululeko/models/model_tree.py +2 -3
- nkululeko/models/model_tree_reg.py +1 -0
- nkululeko/models/model_tuned.py +54 -33
- nkululeko/models/model_xgb.py +1 -0
- nkululeko/models/model_xgr.py +1 -0
- nkululeko/multidb.py +1 -0
- nkululeko/nkululeko.py +1 -1
- nkululeko/plots.py +1 -1
- nkululeko/predict.py +4 -5
- nkululeko/reporting/defines.py +6 -8
- nkululeko/reporting/latex_writer.py +3 -3
- nkululeko/reporting/report.py +2 -2
- nkululeko/reporting/report_item.py +1 -0
- nkululeko/reporting/reporter.py +20 -19
- nkululeko/resample.py +8 -12
- nkululeko/resample_cli.py +99 -0
- nkululeko/runmanager.py +3 -1
- nkululeko/scaler.py +1 -1
- nkululeko/segment.py +6 -5
- nkululeko/segmenting/seg_inaspeechsegmenter.py +3 -3
- nkululeko/segmenting/seg_silero.py +4 -4
- nkululeko/syllable_nuclei.py +9 -22
- nkululeko/test_pretrain.py +6 -7
- nkululeko/utils/stats.py +0 -1
- nkululeko/utils/util.py +2 -3
- {nkululeko-0.89.2.dist-info → nkululeko-0.90.1.dist-info}/METADATA +12 -2
- nkululeko-0.90.1.dist-info/RECORD +119 -0
- {nkululeko-0.89.2.dist-info → nkululeko-0.90.1.dist-info}/WHEEL +1 -1
- nkululeko-0.89.2.dist-info/RECORD +0 -114
- {nkululeko-0.89.2.dist-info → nkululeko-0.90.1.dist-info}/LICENSE +0 -0
- {nkululeko-0.89.2.dist-info → nkululeko-0.90.1.dist-info}/top_level.txt +0 -0
nkululeko/ensemble.py
CHANGED
@@ -15,28 +15,20 @@ Raises:
|
|
15
15
|
ValueError: If an unknown ensemble method is provided.
|
16
16
|
AssertionError: If the number of config files is less than 2 for majority voting.
|
17
17
|
"""
|
18
|
+
|
18
19
|
#!/usr/bin/env python
|
19
20
|
# -*- coding: utf-8 -*-
|
20
21
|
|
21
22
|
|
22
|
-
from typing import List
|
23
23
|
import configparser
|
24
24
|
import time
|
25
25
|
from argparse import ArgumentParser
|
26
26
|
from pathlib import Path
|
27
|
+
from typing import List
|
27
28
|
|
28
29
|
import numpy as np
|
29
30
|
import pandas as pd
|
30
|
-
|
31
|
-
|
32
|
-
from sklearn.metrics import(
|
33
|
-
RocCurveDisplay,
|
34
|
-
balanced_accuracy_score,
|
35
|
-
classification_report,
|
36
|
-
auc,
|
37
|
-
roc_auc_score,
|
38
|
-
roc_curve
|
39
|
-
)
|
31
|
+
from sklearn.metrics import balanced_accuracy_score, classification_report
|
40
32
|
|
41
33
|
from nkululeko.constants import VERSION
|
42
34
|
from nkululeko.experiment import Experiment
|
@@ -169,17 +161,19 @@ def performance_weighted_ensemble(ensemble_preds_ls, labels, weights):
|
|
169
161
|
|
170
162
|
# asserts weiths in decimal 0-1
|
171
163
|
assert all(0 <= w <= 1 for w in weights), "Weights must be between 0 and 1"
|
172
|
-
|
164
|
+
|
173
165
|
# assert lenght of weights matches number of models
|
174
|
-
assert len(weights) == len(
|
175
|
-
|
166
|
+
assert len(weights) == len(
|
167
|
+
ensemble_preds_ls
|
168
|
+
), "Number of weights must match number of models"
|
169
|
+
|
176
170
|
# Normalize weights
|
177
171
|
total_weight = sum(weights)
|
178
172
|
weights = [weight / total_weight for weight in weights]
|
179
|
-
|
173
|
+
|
180
174
|
for idx in ensemble_preds_ls[0].index:
|
181
175
|
class_probabilities = {label: 0 for label in labels}
|
182
|
-
|
176
|
+
|
183
177
|
for df, weight in zip(ensemble_preds_ls, weights):
|
184
178
|
row = df.loc[idx]
|
185
179
|
for label in labels:
|
@@ -192,10 +186,12 @@ def performance_weighted_ensemble(ensemble_preds_ls, labels, weights):
|
|
192
186
|
return final_predictions, final_confidences
|
193
187
|
|
194
188
|
|
195
|
-
|
196
|
-
|
197
189
|
def ensemble_predictions(
|
198
|
-
config_files: List[str],
|
190
|
+
config_files: List[str],
|
191
|
+
method: str,
|
192
|
+
threshold: float,
|
193
|
+
weights: List[float],
|
194
|
+
no_labels: bool,
|
199
195
|
) -> pd.DataFrame:
|
200
196
|
"""
|
201
197
|
Ensemble predictions from multiple experiments.
|
@@ -261,17 +257,20 @@ def ensemble_predictions(
|
|
261
257
|
ensemble_preds_ls, labels, threshold
|
262
258
|
)
|
263
259
|
elif method == "uncertainty_weighted":
|
264
|
-
|
265
|
-
|
266
|
-
|
260
|
+
(
|
261
|
+
ensemble_preds["predicted"],
|
262
|
+
ensemble_preds["uncertainty"],
|
263
|
+
) = uncertainty_weighted_ensemble(ensemble_preds_ls, labels)
|
267
264
|
elif method == "confidence_weighted":
|
268
|
-
|
269
|
-
|
270
|
-
|
265
|
+
(
|
266
|
+
ensemble_preds["predicted"],
|
267
|
+
ensemble_preds["confidence"],
|
268
|
+
) = confidence_weighted_ensemble(ensemble_preds_ls, labels)
|
271
269
|
elif method == "performance_weighted":
|
272
|
-
|
273
|
-
|
274
|
-
|
270
|
+
(
|
271
|
+
ensemble_preds["predicted"],
|
272
|
+
ensemble_preds["confidence"],
|
273
|
+
) = performance_weighted_ensemble(ensemble_preds_ls, labels, weights)
|
275
274
|
else:
|
276
275
|
raise ValueError(f"Unknown ensemble method: {method}")
|
277
276
|
|
nkululeko/experiment.py
CHANGED
@@ -5,13 +5,13 @@ import pickle
|
|
5
5
|
import random
|
6
6
|
import time
|
7
7
|
|
8
|
+
import audeer
|
9
|
+
import audformat
|
8
10
|
import numpy as np
|
9
11
|
import pandas as pd
|
10
12
|
from sklearn.preprocessing import LabelEncoder
|
11
13
|
|
12
|
-
import
|
13
|
-
import audformat
|
14
|
-
|
14
|
+
import nkululeko.glob_conf as glob_conf
|
15
15
|
from nkululeko.data.dataset import Dataset
|
16
16
|
from nkululeko.data.dataset_csv import Dataset_CSV
|
17
17
|
from nkululeko.demo_predictor import Demo_predictor
|
@@ -19,7 +19,6 @@ from nkululeko.feat_extract.feats_analyser import FeatureAnalyser
|
|
19
19
|
from nkululeko.feature_extractor import FeatureExtractor
|
20
20
|
from nkululeko.file_checker import FileChecker
|
21
21
|
from nkululeko.filter_data import DataFilter
|
22
|
-
import nkululeko.glob_conf as glob_conf
|
23
22
|
from nkululeko.plots import Plots
|
24
23
|
from nkululeko.reporting.report import Report
|
25
24
|
from nkululeko.runmanager import Runmanager
|
@@ -101,12 +100,15 @@ class Experiment:
|
|
101
100
|
if data.got_speaker:
|
102
101
|
self.got_speaker = True
|
103
102
|
self.datasets.update({d: data})
|
104
|
-
self.target = self.util.config_val("DATA", "target", "
|
103
|
+
self.target = self.util.config_val("DATA", "target", "none")
|
105
104
|
glob_conf.set_target(self.target)
|
106
105
|
# print target via debug
|
107
106
|
self.util.debug(f"target: {self.target}")
|
108
107
|
# print keys/column
|
109
108
|
dbs = ",".join(list(self.datasets.keys()))
|
109
|
+
if self.target == "none":
|
110
|
+
self.util.debug(f"loaded databases {dbs}")
|
111
|
+
return
|
110
112
|
labels = self.util.config_val("DATA", "labels", False)
|
111
113
|
auto_labels = list(next(iter(self.datasets.values())).df[self.target].unique())
|
112
114
|
if labels:
|
@@ -191,7 +193,8 @@ class Experiment:
|
|
191
193
|
self.df_train, self.df_test = pd.DataFrame(), pd.DataFrame()
|
192
194
|
for d in self.datasets.values():
|
193
195
|
d.split()
|
194
|
-
|
196
|
+
if self.target != "none":
|
197
|
+
d.prepare_labels()
|
195
198
|
if d.df_train.shape[0] == 0:
|
196
199
|
self.util.debug(f"warn: {d.name} train empty")
|
197
200
|
self.df_train = pd.concat([self.df_train, d.df_train])
|
@@ -207,6 +210,8 @@ class Experiment:
|
|
207
210
|
self.df_test.to_csv(storage_test)
|
208
211
|
self.df_train.to_csv(storage_train)
|
209
212
|
|
213
|
+
if self.target == "none":
|
214
|
+
return
|
210
215
|
self.util.copy_flags(self, self.df_test)
|
211
216
|
self.util.copy_flags(self, self.df_train)
|
212
217
|
# Try data checks
|
@@ -738,7 +743,6 @@ class Experiment:
|
|
738
743
|
if model.is_ann():
|
739
744
|
print("converting to onnx from torch")
|
740
745
|
else:
|
741
|
-
|
742
746
|
print("converting to onnx from sklearn")
|
743
747
|
# save the rest
|
744
748
|
f = open(filename, "wb")
|