nkululeko 0.86.0__py3-none-any.whl → 0.86.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/experiment.py +3 -2
- nkululeko/models/model_tuned.py +11 -3
- nkululeko/resample.py +32 -6
- nkululeko/utils/util.py +6 -0
- {nkululeko-0.86.0.dist-info → nkululeko-0.86.1.dist-info}/METADATA +7 -1
- {nkululeko-0.86.0.dist-info → nkululeko-0.86.1.dist-info}/RECORD +10 -10
- {nkululeko-0.86.0.dist-info → nkululeko-0.86.1.dist-info}/LICENSE +0 -0
- {nkululeko-0.86.0.dist-info → nkululeko-0.86.1.dist-info}/WHEEL +0 -0
- {nkululeko-0.86.0.dist-info → nkululeko-0.86.1.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.86.
|
1
|
+
VERSION="0.86.1"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/experiment.py
CHANGED
@@ -72,8 +72,9 @@ class Experiment:
|
|
72
72
|
if self.util.config_val("REPORT", "latex", False):
|
73
73
|
self.report.export_latex()
|
74
74
|
|
75
|
-
|
76
|
-
|
75
|
+
# moved to util
|
76
|
+
# def get_name(self):
|
77
|
+
# return self.util.get_exp_name()
|
77
78
|
|
78
79
|
def set_globals(self, config_obj):
|
79
80
|
"""install a config object in the global space"""
|
nkululeko/models/model_tuned.py
CHANGED
@@ -64,7 +64,8 @@ class TunedModel(BaseModel):
|
|
64
64
|
|
65
65
|
def _init_model(self):
|
66
66
|
model_path = "facebook/wav2vec2-large-robust-ft-swbd-300h"
|
67
|
-
pretrained_model = self.util.config_val(
|
67
|
+
pretrained_model = self.util.config_val(
|
68
|
+
"MODEL", "pretrained_model", model_path)
|
68
69
|
self.num_layers = None
|
69
70
|
self.sampling_rate = 16000
|
70
71
|
self.max_duration_sec = 8.0
|
@@ -95,6 +96,7 @@ class TunedModel(BaseModel):
|
|
95
96
|
|
96
97
|
# load pre-trained model
|
97
98
|
if self.is_classifier:
|
99
|
+
self.util.debug(f"Task is classification.")
|
98
100
|
le = glob_conf.label_encoder
|
99
101
|
mapping = dict(zip(le.classes_, range(len(le.classes_))))
|
100
102
|
target_mapping = {k: int(v) for k, v in mapping.items()}
|
@@ -102,15 +104,16 @@ class TunedModel(BaseModel):
|
|
102
104
|
value: key for key, value in target_mapping.items()
|
103
105
|
}
|
104
106
|
self.config = transformers.AutoConfig.from_pretrained(
|
105
|
-
|
107
|
+
pretrained_model,
|
106
108
|
num_labels=len(target_mapping),
|
107
109
|
label2id=target_mapping,
|
108
110
|
id2label=target_mapping_reverse,
|
109
111
|
finetuning_task=target_name,
|
110
112
|
)
|
111
113
|
else:
|
114
|
+
self.util.debug(f"Task is regression.")
|
112
115
|
self.config = transformers.AutoConfig.from_pretrained(
|
113
|
-
|
116
|
+
pretrained_model,
|
114
117
|
num_labels=1,
|
115
118
|
finetuning_task=target_name,
|
116
119
|
)
|
@@ -253,6 +256,9 @@ class TunedModel(BaseModel):
|
|
253
256
|
else:
|
254
257
|
criterion = ConcordanceCorCoeff()
|
255
258
|
|
259
|
+
# set push_to_hub value, default false
|
260
|
+
push = self.util.config_val("MODEL", "push_to_hub", False)
|
261
|
+
|
256
262
|
class Trainer(transformers.Trainer):
|
257
263
|
def compute_loss(
|
258
264
|
self,
|
@@ -299,6 +305,8 @@ class TunedModel(BaseModel):
|
|
299
305
|
load_best_model_at_end=True,
|
300
306
|
remove_unused_columns=False,
|
301
307
|
report_to="none",
|
308
|
+
push_to_hub=push,
|
309
|
+
hub_model_id=f"{self.util.get_name()}",
|
302
310
|
)
|
303
311
|
|
304
312
|
trainer = Trainer(
|
nkululeko/resample.py
CHANGED
@@ -11,22 +11,32 @@ from nkululeko.utils.util import Util
|
|
11
11
|
|
12
12
|
from nkululeko.constants import VERSION
|
13
13
|
from nkululeko.experiment import Experiment
|
14
|
+
from nkululeko.utils.files import find_files
|
14
15
|
|
15
16
|
|
16
17
|
def main(src_dir):
|
17
18
|
parser = argparse.ArgumentParser(
|
18
|
-
description="Call the nkululeko RESAMPLE framework."
|
19
|
+
description="Call the nkululeko RESAMPLE framework."
|
20
|
+
)
|
19
21
|
parser.add_argument("--config", default=None,
|
20
22
|
help="The base configuration")
|
21
23
|
parser.add_argument("--file", default=None,
|
22
24
|
help="The input audio file to resample")
|
23
|
-
parser.add_argument(
|
24
|
-
|
25
|
+
parser.add_argument(
|
26
|
+
"--folder",
|
27
|
+
default=None,
|
28
|
+
help="The input directory containing audio files and subdirectories to resample",
|
29
|
+
)
|
30
|
+
parser.add_argument(
|
31
|
+
"--replace", action="store_true", help="Replace the original audio file"
|
32
|
+
)
|
25
33
|
|
26
34
|
args = parser.parse_args()
|
27
35
|
|
28
|
-
if args.file is None and args.config is None:
|
29
|
-
print(
|
36
|
+
if args.file is None and args.folder is None and args.config is None:
|
37
|
+
print(
|
38
|
+
"ERROR: Either --file, --folder, or --config argument must be provided."
|
39
|
+
)
|
30
40
|
exit()
|
31
41
|
|
32
42
|
if args.file is not None:
|
@@ -42,6 +52,20 @@ def main(src_dir):
|
|
42
52
|
util.debug(f"Resampling audio file: {args.file}")
|
43
53
|
rs = Resampler(df_sample, not_testing=True, replace=args.replace)
|
44
54
|
rs.resample()
|
55
|
+
elif args.folder is not None:
|
56
|
+
# Load all audio files in the directory and its subdirectories into a DataFrame
|
57
|
+
files = find_files(args.folder, relative=True, ext=["wav"])
|
58
|
+
files = pd.Series(files)
|
59
|
+
df_sample = pd.DataFrame(index=files)
|
60
|
+
df_sample.index = audformat.utils.to_segmented_index(
|
61
|
+
df_sample.index, allow_nat=False
|
62
|
+
)
|
63
|
+
|
64
|
+
# Resample the audio files
|
65
|
+
util = Util("resampler", has_config=False)
|
66
|
+
util.debug(f"Resampling audio files in directory: {args.folder}")
|
67
|
+
rs = Resampler(df_sample, not_testing=True, replace=args.replace)
|
68
|
+
rs.resample()
|
45
69
|
else:
|
46
70
|
# Existing code for handling INI file
|
47
71
|
config_file = args.config
|
@@ -66,6 +90,7 @@ def main(src_dir):
|
|
66
90
|
|
67
91
|
if util.config_val("EXP", "no_warnings", False):
|
68
92
|
import warnings
|
93
|
+
|
69
94
|
warnings.filterwarnings("ignore")
|
70
95
|
|
71
96
|
# Load the data
|
@@ -74,7 +99,8 @@ def main(src_dir):
|
|
74
99
|
# Split into train and test
|
75
100
|
expr.fill_train_and_tests()
|
76
101
|
util.debug(
|
77
|
-
f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}"
|
102
|
+
f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}"
|
103
|
+
)
|
78
104
|
|
79
105
|
sample_selection = util.config_val(
|
80
106
|
"RESAMPLE", "sample_selection", "all")
|
nkululeko/utils/util.py
CHANGED
@@ -134,6 +134,12 @@ class Util:
|
|
134
134
|
pd_series.dtype, pd.CategoricalDtype
|
135
135
|
)
|
136
136
|
|
137
|
+
def get_name(self):
|
138
|
+
"""
|
139
|
+
Get the name of the experiment
|
140
|
+
"""
|
141
|
+
return self.config["EXP"]["name"]
|
142
|
+
|
137
143
|
def get_exp_dir(self):
|
138
144
|
"""
|
139
145
|
Get the experiment directory
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.86.
|
3
|
+
Version: 0.86.1
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -256,6 +256,7 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
|
|
256
256
|
* [Compare several databases](http://blog.syntheticspeech.de/2024/01/02/nkululeko-compare-several-databases/)
|
257
257
|
* [Tweak the target variable for database comparison](http://blog.syntheticspeech.de/2024/03/13/nkululeko-how-to-tweak-the-target-variable-for-database-comparison/)
|
258
258
|
* [How to run multiple experiments in one go](http://blog.syntheticspeech.de/2022/03/28/how-to-run-multiple-experiments-in-one-go-with-nkululeko/)
|
259
|
+
* [How to finetune a transformer-model](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
|
259
260
|
|
260
261
|
### <a name="helloworld">Hello World example</a>
|
261
262
|
* NEW: [Here's a Google colab that runs this example out-of-the-box](https://colab.research.google.com/drive/1GYNBd5cdZQ1QC3Jm58qoeMaJg3UuPhjw?usp=sharing#scrollTo=4G_SjuF9xeQf), and here is the same [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
|
@@ -333,6 +334,11 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
333
334
|
Changelog
|
334
335
|
=========
|
335
336
|
|
337
|
+
Version 0.86.1
|
338
|
+
--------------
|
339
|
+
* functionality to push to hub
|
340
|
+
* fixed bug that prevented wavlm finetuning
|
341
|
+
|
336
342
|
Version 0.86.0
|
337
343
|
--------------
|
338
344
|
* added regression to finetuning
|
@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
|
3
3
|
nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
|
4
4
|
nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=pZ3DZYgXdEpxfaj-mnI6q21TyYMa2QQG_sKa6CBxCCA,39
|
6
6
|
nkululeko/demo.py,sha256=8bl15Kitoesnz8oa8yrs52T6YCSOhWbbq9PnZ8Hj6D0,3232
|
7
7
|
nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
|
8
8
|
nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
|
9
|
-
nkululeko/experiment.py,sha256=
|
9
|
+
nkululeko/experiment.py,sha256=24FmvF9_zNXE86fO6gzss1M-BjceOCiV6nyJAs0SM_Y,30986
|
10
10
|
nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
|
11
11
|
nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
|
12
12
|
nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
|
@@ -19,7 +19,7 @@ nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
|
|
19
19
|
nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
|
20
20
|
nkululeko/plots.py,sha256=nd9tF_61DyAx7oGZF8gTrHXazkgFjFe4eClxu1nQ_XU,23276
|
21
21
|
nkululeko/predict.py,sha256=sF091sSSLnEWcISx9ZcULLie3tY5XeFsQJd6b3vrxFg,2409
|
22
|
-
nkululeko/resample.py,sha256=
|
22
|
+
nkululeko/resample.py,sha256=2d9eao_0sLrGZ_KSl8OVKsPor3BkFrlmMhrpB9WelIs,4267
|
23
23
|
nkululeko/runmanager.py,sha256=eTM1DNQKt1lxYhzt4vZyZluPXW9sWlIJHNQzex4lkJU,7624
|
24
24
|
nkululeko/scaler.py,sha256=4nkIqoajkIkuTPK0Z02ifMN_awl6fP_i-GBYdoGYgGM,4101
|
25
25
|
nkululeko/segment.py,sha256=YLKckX44tbvTb3LrdgYw9X4guzuF27sutl92z9DkpZU,4835
|
@@ -88,7 +88,7 @@ nkululeko/models/model_svm.py,sha256=rsME3KvKvNG7bdE5lbvYUu85WZhaASZxxmdNDIVJRZ4
|
|
88
88
|
nkululeko/models/model_svr.py,sha256=_YZeksqB3eBENGlg3g9RwYFlk9rQQ-XCeNBKLlGGVoE,725
|
89
89
|
nkululeko/models/model_tree.py,sha256=rf16faUm4o2LJgkoYpeY998b8DQIvXZ73_m1IS3TnnE,417
|
90
90
|
nkululeko/models/model_tree_reg.py,sha256=IgQcPTE-304HQLYSKPF8Z4ot_Ur9dH01fZjS0nXke_M,428
|
91
|
-
nkululeko/models/model_tuned.py,sha256=
|
91
|
+
nkululeko/models/model_tuned.py,sha256=eiSKFmObn9_VNTqF1lZvWbyyWxvhy1PVjOiIcs3YiGA,18379
|
92
92
|
nkululeko/models/model_xgb.py,sha256=Thgx5ESdIok4v72mKh4plxpo4smGcKALWNCJTDScY0M,447
|
93
93
|
nkululeko/models/model_xgr.py,sha256=aGBtNGLWjOE_2rICGYGFxmT8DtnHYsIl1lIpMtghHsY,418
|
94
94
|
nkululeko/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -104,9 +104,9 @@ nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5Ev
|
|
104
104
|
nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
105
105
|
nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
|
106
106
|
nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
|
107
|
-
nkululeko/utils/util.py,sha256=
|
108
|
-
nkululeko-0.86.
|
109
|
-
nkululeko-0.86.
|
110
|
-
nkululeko-0.86.
|
111
|
-
nkululeko-0.86.
|
112
|
-
nkululeko-0.86.
|
107
|
+
nkululeko/utils/util.py,sha256=mK1MgO14NinrPhavJw72eR_2WN_kBKjVKiEJnzvdO1Q,13946
|
108
|
+
nkululeko-0.86.1.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
109
|
+
nkululeko-0.86.1.dist-info/METADATA,sha256=LXoMlzo5QBzABv0fpIDvf4nYDjCJkRCZL1XmffikrRc,37088
|
110
|
+
nkululeko-0.86.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
111
|
+
nkululeko-0.86.1.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
112
|
+
nkululeko-0.86.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|