nkululeko 0.86.4__py3-none-any.whl → 0.86.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/data/dataset_csv.py +14 -0
- nkululeko/experiment.py +2 -2
- nkululeko/reporting/reporter.py +9 -1
- {nkululeko-0.86.4.dist-info → nkululeko-0.86.5.dist-info}/METADATA +15 -3
- {nkululeko-0.86.4.dist-info → nkululeko-0.86.5.dist-info}/RECORD +9 -9
- {nkululeko-0.86.4.dist-info → nkululeko-0.86.5.dist-info}/LICENSE +0 -0
- {nkululeko-0.86.4.dist-info → nkululeko-0.86.5.dist-info}/WHEEL +0 -0
- {nkululeko-0.86.4.dist-info → nkululeko-0.86.5.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.86.
|
1
|
+
VERSION="0.86.5"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/data/dataset_csv.py
CHANGED
@@ -59,6 +59,20 @@ class Dataset_CSV(Dataset):
|
|
59
59
|
lambda x: root + "/" + audio_path + "/" + x
|
60
60
|
)
|
61
61
|
)
|
62
|
+
else: # absolute path is True
|
63
|
+
if audformat.index_type(df.index) == "segmented":
|
64
|
+
file_index = (
|
65
|
+
df.index.levels[0]
|
66
|
+
.map(lambda x: audio_path + "/" + x)
|
67
|
+
.values
|
68
|
+
)
|
69
|
+
df = df.set_index(df.index.set_levels(
|
70
|
+
file_index, level="file"))
|
71
|
+
else:
|
72
|
+
if not isinstance(df, pd.DataFrame):
|
73
|
+
df = pd.DataFrame(df)
|
74
|
+
df = df.set_index(df.index.to_series().apply(
|
75
|
+
lambda x: audio_path + "/" + x ))
|
62
76
|
|
63
77
|
self.df = df
|
64
78
|
self.db = None
|
nkululeko/experiment.py
CHANGED
@@ -112,11 +112,11 @@ class Experiment:
|
|
112
112
|
auto_labels = list(next(iter(self.datasets.values())).df[self.target].unique())
|
113
113
|
if labels:
|
114
114
|
self.labels = ast.literal_eval(labels)
|
115
|
-
self.util.debug(f"
|
115
|
+
self.util.debug(f"Using target labels (from config): {labels}")
|
116
116
|
else:
|
117
117
|
self.labels = auto_labels
|
118
118
|
# print autolabel no matter it is specified or not
|
119
|
-
self.util.debug(f"
|
119
|
+
self.util.debug(f"Labels (from database): {auto_labels}")
|
120
120
|
glob_conf.set_labels(self.labels)
|
121
121
|
self.util.debug(f"loaded databases {dbs}")
|
122
122
|
|
nkululeko/reporting/reporter.py
CHANGED
@@ -7,10 +7,11 @@ from confidence_intervals import evaluate_with_conf_int
|
|
7
7
|
import matplotlib.pyplot as plt
|
8
8
|
import numpy as np
|
9
9
|
from scipy.stats import pearsonr
|
10
|
-
from sklearn.metrics import ConfusionMatrixDisplay
|
10
|
+
from sklearn.metrics import ConfusionMatrixDisplay, roc_curve
|
11
11
|
from sklearn.metrics import classification_report
|
12
12
|
from sklearn.metrics import confusion_matrix
|
13
13
|
from sklearn.metrics import r2_score
|
14
|
+
from sklearn.metrics import roc_curve, auc, roc_auc_score
|
14
15
|
from torch import is_tensor
|
15
16
|
|
16
17
|
from audmetric import accuracy
|
@@ -262,8 +263,15 @@ class Reporter:
|
|
262
263
|
c_ress[i] = float(f"{c_res:.3f}")
|
263
264
|
self.util.debug(f"labels: {labels}")
|
264
265
|
f1_per_class = f"result per class (F1 score): {c_ress}"
|
266
|
+
if len(np.unique(self.truths)) == 2:
|
267
|
+
fpr, tpr, _ = roc_curve(self.truths, self.preds)
|
268
|
+
auc_score = auc(fpr, tpr)
|
269
|
+
pauc_score = roc_auc_score(self.truths, self.preds, max_fpr=0.1)
|
270
|
+
auc_pauc = f"auc: {auc_score:.3f}, pauc: {pauc_score:.3f}"
|
271
|
+
self.util.debug(auc_pauc)
|
265
272
|
self.util.debug(f1_per_class)
|
266
273
|
rpt_str = f"{json.dumps(rpt)}\n{f1_per_class}"
|
274
|
+
# rpt_str += f"\n{auc_auc}"
|
267
275
|
text_file.write(rpt_str)
|
268
276
|
glob_conf.report.add_item(
|
269
277
|
ReportItem(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.86.
|
3
|
+
Version: 0.86.5
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -58,6 +58,8 @@ Requires-Dist: pylatex
|
|
58
58
|
- [Hello World example](#hello-world-example)
|
59
59
|
- [Features](#features)
|
60
60
|
- [License](#license)
|
61
|
+
- [Contributing](#contributing)
|
62
|
+
- [Citing](#citing)
|
61
63
|
|
62
64
|
|
63
65
|
## Overview
|
@@ -65,7 +67,7 @@ A project to detect speaker characteristics by machine learning experiments with
|
|
65
67
|
|
66
68
|
The idea is to have a framework (based on e.g. sklearn and torch) that can be used to rapidly and automatically analyse audio data and explore machine learning models based on that data.
|
67
69
|
|
68
|
-
* NEW
|
70
|
+
* NEW with nkululeko: [Finetune transformer-models](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
|
69
71
|
* The latest features can be seen in [the ini-file](./ini_file.md) options that are used to control Nkululeko
|
70
72
|
* Below is a [Hello World example](#helloworld) that should set you up fastly, also on [Google Colab](https://colab.research.google.com/drive/1GYNBd5cdZQ1QC3Jm58qoeMaJg3UuPhjw?usp=sharing#scrollTo=4G_SjuF9xeQf), and [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
|
71
73
|
* [Here's a blog post on how to set up nkululeko on your computer.](http://blog.syntheticspeech.de/2021/08/30/how-to-set-up-your-first-nkululeko-project/)
|
@@ -249,7 +251,7 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
|
|
249
251
|
* [Predict new labels for your data from public models and check bias](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/)
|
250
252
|
* [Resample](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/)
|
251
253
|
* [Get some statistics on correlation and effect-size](http://blog.syntheticspeech.de/2023/09/05/nkululeko-get-some-statistics-on-correlation-and-effect-size/)
|
252
|
-
* [
|
254
|
+
* [Automatic generation of a latex / pdf report](http://blog.syntheticspeech.de/2023/09/26/nkululeko-generate-a-latex-pdf-report/)
|
253
255
|
* [Inspect your data with Spotlight](http://blog.syntheticspeech.de/2023/10/31/nkululeko-inspect-your-data-with-spotlight/)
|
254
256
|
* [Automatically stratify your split sets](http://blog.syntheticspeech.de/2023/11/07/nkululeko-automatically-stratify-your-split-sets/)
|
255
257
|
* [re-name data column names](http://blog.syntheticspeech.de/2023/11/16/nkululeko-re-name-data-column-names/)
|
@@ -314,6 +316,12 @@ Here's [an animation that shows the progress of classification done with nkulule
|
|
314
316
|
|
315
317
|
## License
|
316
318
|
Nkululeko can be used under the [MIT license](https://choosealicense.com/licenses/mit/)
|
319
|
+
|
320
|
+
|
321
|
+
## Contributing
|
322
|
+
Contributions are welcome and encouraged. To learn more about how to contribute to nkululeko please refer to the [Contributing guidelines](./CONTRIBUTING.md)
|
323
|
+
|
324
|
+
## Citing
|
317
325
|
If you use it, please mention the Nkululeko paper
|
318
326
|
|
319
327
|
F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schuller: Nkululeko: A Tool For Rapid Speaker Characteristics Detection, Proc. Proc. LREC, 2022
|
@@ -335,6 +343,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
335
343
|
Changelog
|
336
344
|
=========
|
337
345
|
|
346
|
+
Version 0.86.5
|
347
|
+
--------------
|
348
|
+
* fix audio path detection in data csv import
|
349
|
+
|
338
350
|
Version 0.86.4
|
339
351
|
--------------
|
340
352
|
* add finetuning to the demo module
|
@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
|
3
3
|
nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
|
4
4
|
nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=ctptCGup_HGCOxioUojLqMivtVfYq8CZDLHJprDr9aE,39
|
6
6
|
nkululeko/demo.py,sha256=WSKr-W5uJ9DQfemK923g7Hd5V3kgAn03Er0JX1Pa45I,5142
|
7
7
|
nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
|
8
8
|
nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
|
9
|
-
nkululeko/experiment.py,sha256=
|
9
|
+
nkululeko/experiment.py,sha256=5nF-eDf8OCp6KRIU7KnryWL5SLJQUtr2BueHhEdcKw0,31040
|
10
10
|
nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
|
11
11
|
nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
|
12
12
|
nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
|
@@ -46,7 +46,7 @@ nkululeko/autopredict/ap_valence.py,sha256=n-hctRKySzhmJtowuMOTUu0T_ld3uK5pnfOzW
|
|
46
46
|
nkululeko/autopredict/estimate_snr.py,sha256=S-bpS0xFkwWc4Ch75UrjbS8y538lQ0U3g_iLRFXureY,5048
|
47
47
|
nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
48
48
|
nkululeko/data/dataset.py,sha256=JGzMD6HIvkFkYBekmbmslIKc5ADaCj06T-8gpqH_kFo,27650
|
49
|
-
nkululeko/data/dataset_csv.py,sha256=
|
49
|
+
nkululeko/data/dataset_csv.py,sha256=dzOrbKB8t0UATAIYaKAOqHTogmYPBqskt6Hak7VjbSM,4537
|
50
50
|
nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
51
|
nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
|
52
52
|
nkululeko/feat_extract/feats_agender_agender.py,sha256=tgH2BnwcxpvuLmOkrMbVdBSX0Onfz2MG12FsddalRKI,3424
|
@@ -96,7 +96,7 @@ nkululeko/reporting/defines.py,sha256=IsY1YgKRMaABpylVKjBJgJ5bNCEbGCVA_E6pivraqS
|
|
96
96
|
nkululeko/reporting/latex_writer.py,sha256=qiCRSmB4KOD_za4oHu5x-PhwjZohzfo8wecMOwlXZwc,1886
|
97
97
|
nkululeko/reporting/report.py,sha256=W0rcigDdjBvxZQ3pZja_gvToILYvaZ1BFtnN2qFRfYI,1060
|
98
98
|
nkululeko/reporting/report_item.py,sha256=siWeGNgo4bAE46YBMNcsdf3jTMTy76BO9Fi6DTvDig4,533
|
99
|
-
nkululeko/reporting/reporter.py,sha256=
|
99
|
+
nkululeko/reporting/reporter.py,sha256=II3QyeneAv8xQDBZ-qE_GJL8_WV_yXqLwBUYqrjqwPo,13938
|
100
100
|
nkululeko/reporting/result.py,sha256=nSN5or-Py2GPRWHkWpGRh7UCi1W0er7WLEHz8fYLk-A,742
|
101
101
|
nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
102
102
|
nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=pmLHuXsaqvcdYxB4PSW9l1mbQWZZBJFhi_CGabqydas,1947
|
@@ -105,8 +105,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
105
105
|
nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
|
106
106
|
nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
|
107
107
|
nkululeko/utils/util.py,sha256=ILpfNuaeq-hy1bUkRhVrzO2wG9z9Upaozs9EBoIaMG0,14123
|
108
|
-
nkululeko-0.86.
|
109
|
-
nkululeko-0.86.
|
110
|
-
nkululeko-0.86.
|
111
|
-
nkululeko-0.86.
|
112
|
-
nkululeko-0.86.
|
108
|
+
nkululeko-0.86.5.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
109
|
+
nkululeko-0.86.5.dist-info/METADATA,sha256=HrTVTfGh3KDsmyBFijAp5tMINdiBvHhsC8E0_YwBjwE,37848
|
110
|
+
nkululeko-0.86.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
111
|
+
nkululeko-0.86.5.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
112
|
+
nkululeko-0.86.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|