nkululeko 0.86.0__py3-none-any.whl → 0.86.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.86.0"
1
+ VERSION="0.86.1"
2
2
  SAMPLING_RATE = 16000
nkululeko/experiment.py CHANGED
@@ -72,8 +72,9 @@ class Experiment:
72
72
  if self.util.config_val("REPORT", "latex", False):
73
73
  self.report.export_latex()
74
74
 
75
- def get_name(self):
76
- return self.util.get_exp_name()
75
+ # moved to util
76
+ # def get_name(self):
77
+ # return self.util.get_exp_name()
77
78
 
78
79
  def set_globals(self, config_obj):
79
80
  """install a config object in the global space"""
@@ -64,7 +64,8 @@ class TunedModel(BaseModel):
64
64
 
65
65
  def _init_model(self):
66
66
  model_path = "facebook/wav2vec2-large-robust-ft-swbd-300h"
67
- pretrained_model = self.util.config_val("MODEL", "pretrained_model", model_path)
67
+ pretrained_model = self.util.config_val(
68
+ "MODEL", "pretrained_model", model_path)
68
69
  self.num_layers = None
69
70
  self.sampling_rate = 16000
70
71
  self.max_duration_sec = 8.0
@@ -95,6 +96,7 @@ class TunedModel(BaseModel):
95
96
 
96
97
  # load pre-trained model
97
98
  if self.is_classifier:
99
+ self.util.debug(f"Task is classification.")
98
100
  le = glob_conf.label_encoder
99
101
  mapping = dict(zip(le.classes_, range(len(le.classes_))))
100
102
  target_mapping = {k: int(v) for k, v in mapping.items()}
@@ -102,15 +104,16 @@ class TunedModel(BaseModel):
102
104
  value: key for key, value in target_mapping.items()
103
105
  }
104
106
  self.config = transformers.AutoConfig.from_pretrained(
105
- model_path,
107
+ pretrained_model,
106
108
  num_labels=len(target_mapping),
107
109
  label2id=target_mapping,
108
110
  id2label=target_mapping_reverse,
109
111
  finetuning_task=target_name,
110
112
  )
111
113
  else:
114
+ self.util.debug(f"Task is regression.")
112
115
  self.config = transformers.AutoConfig.from_pretrained(
113
- model_path,
116
+ pretrained_model,
114
117
  num_labels=1,
115
118
  finetuning_task=target_name,
116
119
  )
@@ -253,6 +256,9 @@ class TunedModel(BaseModel):
253
256
  else:
254
257
  criterion = ConcordanceCorCoeff()
255
258
 
259
+ # set push_to_hub value, default false
260
+ push = self.util.config_val("MODEL", "push_to_hub", False)
261
+
256
262
  class Trainer(transformers.Trainer):
257
263
  def compute_loss(
258
264
  self,
@@ -299,6 +305,8 @@ class TunedModel(BaseModel):
299
305
  load_best_model_at_end=True,
300
306
  remove_unused_columns=False,
301
307
  report_to="none",
308
+ push_to_hub=push,
309
+ hub_model_id=f"{self.util.get_name()}",
302
310
  )
303
311
 
304
312
  trainer = Trainer(
nkululeko/resample.py CHANGED
@@ -11,22 +11,32 @@ from nkululeko.utils.util import Util
11
11
 
12
12
  from nkululeko.constants import VERSION
13
13
  from nkululeko.experiment import Experiment
14
+ from nkululeko.utils.files import find_files
14
15
 
15
16
 
16
17
  def main(src_dir):
17
18
  parser = argparse.ArgumentParser(
18
- description="Call the nkululeko RESAMPLE framework.")
19
+ description="Call the nkululeko RESAMPLE framework."
20
+ )
19
21
  parser.add_argument("--config", default=None,
20
22
  help="The base configuration")
21
23
  parser.add_argument("--file", default=None,
22
24
  help="The input audio file to resample")
23
- parser.add_argument("--replace", action="store_true",
24
- help="Replace the original audio file")
25
+ parser.add_argument(
26
+ "--folder",
27
+ default=None,
28
+ help="The input directory containing audio files and subdirectories to resample",
29
+ )
30
+ parser.add_argument(
31
+ "--replace", action="store_true", help="Replace the original audio file"
32
+ )
25
33
 
26
34
  args = parser.parse_args()
27
35
 
28
- if args.file is None and args.config is None:
29
- print("ERROR: Either --file or --config argument must be provided.")
36
+ if args.file is None and args.folder is None and args.config is None:
37
+ print(
38
+ "ERROR: Either --file, --folder, or --config argument must be provided."
39
+ )
30
40
  exit()
31
41
 
32
42
  if args.file is not None:
@@ -42,6 +52,20 @@ def main(src_dir):
42
52
  util.debug(f"Resampling audio file: {args.file}")
43
53
  rs = Resampler(df_sample, not_testing=True, replace=args.replace)
44
54
  rs.resample()
55
+ elif args.folder is not None:
56
+ # Load all audio files in the directory and its subdirectories into a DataFrame
57
+ files = find_files(args.folder, relative=True, ext=["wav"])
58
+ files = pd.Series(files)
59
+ df_sample = pd.DataFrame(index=files)
60
+ df_sample.index = audformat.utils.to_segmented_index(
61
+ df_sample.index, allow_nat=False
62
+ )
63
+
64
+ # Resample the audio files
65
+ util = Util("resampler", has_config=False)
66
+ util.debug(f"Resampling audio files in directory: {args.folder}")
67
+ rs = Resampler(df_sample, not_testing=True, replace=args.replace)
68
+ rs.resample()
45
69
  else:
46
70
  # Existing code for handling INI file
47
71
  config_file = args.config
@@ -66,6 +90,7 @@ def main(src_dir):
66
90
 
67
91
  if util.config_val("EXP", "no_warnings", False):
68
92
  import warnings
93
+
69
94
  warnings.filterwarnings("ignore")
70
95
 
71
96
  # Load the data
@@ -74,7 +99,8 @@ def main(src_dir):
74
99
  # Split into train and test
75
100
  expr.fill_train_and_tests()
76
101
  util.debug(
77
- f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
102
+ f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}"
103
+ )
78
104
 
79
105
  sample_selection = util.config_val(
80
106
  "RESAMPLE", "sample_selection", "all")
nkululeko/utils/util.py CHANGED
@@ -134,6 +134,12 @@ class Util:
134
134
  pd_series.dtype, pd.CategoricalDtype
135
135
  )
136
136
 
137
+ def get_name(self):
138
+ """
139
+ Get the name of the experiment
140
+ """
141
+ return self.config["EXP"]["name"]
142
+
137
143
  def get_exp_dir(self):
138
144
  """
139
145
  Get the experiment directory
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.86.0
3
+ Version: 0.86.1
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -256,6 +256,7 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
256
256
  * [Compare several databases](http://blog.syntheticspeech.de/2024/01/02/nkululeko-compare-several-databases/)
257
257
  * [Tweak the target variable for database comparison](http://blog.syntheticspeech.de/2024/03/13/nkululeko-how-to-tweak-the-target-variable-for-database-comparison/)
258
258
  * [How to run multiple experiments in one go](http://blog.syntheticspeech.de/2022/03/28/how-to-run-multiple-experiments-in-one-go-with-nkululeko/)
259
+ * [How to finetune a transformer-model](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
259
260
 
260
261
  ### <a name="helloworld">Hello World example</a>
261
262
  * NEW: [Here's a Google colab that runs this example out-of-the-box](https://colab.research.google.com/drive/1GYNBd5cdZQ1QC3Jm58qoeMaJg3UuPhjw?usp=sharing#scrollTo=4G_SjuF9xeQf), and here is the same [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
@@ -333,6 +334,11 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
333
334
  Changelog
334
335
  =========
335
336
 
337
+ Version 0.86.1
338
+ --------------
339
+ * functionality to push to hub
340
+ * fixed bug that prevented wavlm finetuning
341
+
336
342
  Version 0.86.0
337
343
  --------------
338
344
  * added regression to finetuning
@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
3
  nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=hvi1X27m7vcqkB_Rgl7alourAusZB1mjPxdW4ChdVyU,39
5
+ nkululeko/constants.py,sha256=pZ3DZYgXdEpxfaj-mnI6q21TyYMa2QQG_sKa6CBxCCA,39
6
6
  nkululeko/demo.py,sha256=8bl15Kitoesnz8oa8yrs52T6YCSOhWbbq9PnZ8Hj6D0,3232
7
7
  nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
8
8
  nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
9
- nkululeko/experiment.py,sha256=gUJsBMWuadqxEVzuPVToQzFHC9FRUadptP49kTcBiGs,30962
9
+ nkululeko/experiment.py,sha256=24FmvF9_zNXE86fO6gzss1M-BjceOCiV6nyJAs0SM_Y,30986
10
10
  nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
11
11
  nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
12
12
  nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
@@ -19,7 +19,7 @@ nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
19
19
  nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
20
20
  nkululeko/plots.py,sha256=nd9tF_61DyAx7oGZF8gTrHXazkgFjFe4eClxu1nQ_XU,23276
21
21
  nkululeko/predict.py,sha256=sF091sSSLnEWcISx9ZcULLie3tY5XeFsQJd6b3vrxFg,2409
22
- nkululeko/resample.py,sha256=IPtYqU0nhZ-CqO_O1jJN0EvpfjxHZdFRwdTpEJOVuaQ,3354
22
+ nkululeko/resample.py,sha256=2d9eao_0sLrGZ_KSl8OVKsPor3BkFrlmMhrpB9WelIs,4267
23
23
  nkululeko/runmanager.py,sha256=eTM1DNQKt1lxYhzt4vZyZluPXW9sWlIJHNQzex4lkJU,7624
24
24
  nkululeko/scaler.py,sha256=4nkIqoajkIkuTPK0Z02ifMN_awl6fP_i-GBYdoGYgGM,4101
25
25
  nkululeko/segment.py,sha256=YLKckX44tbvTb3LrdgYw9X4guzuF27sutl92z9DkpZU,4835
@@ -88,7 +88,7 @@ nkululeko/models/model_svm.py,sha256=rsME3KvKvNG7bdE5lbvYUu85WZhaASZxxmdNDIVJRZ4
88
88
  nkululeko/models/model_svr.py,sha256=_YZeksqB3eBENGlg3g9RwYFlk9rQQ-XCeNBKLlGGVoE,725
89
89
  nkululeko/models/model_tree.py,sha256=rf16faUm4o2LJgkoYpeY998b8DQIvXZ73_m1IS3TnnE,417
90
90
  nkululeko/models/model_tree_reg.py,sha256=IgQcPTE-304HQLYSKPF8Z4ot_Ur9dH01fZjS0nXke_M,428
91
- nkululeko/models/model_tuned.py,sha256=J5CemIAW_WhZIQgppFgPChrsMJvGYzJlCvJC8O62l9M,18049
91
+ nkululeko/models/model_tuned.py,sha256=eiSKFmObn9_VNTqF1lZvWbyyWxvhy1PVjOiIcs3YiGA,18379
92
92
  nkululeko/models/model_xgb.py,sha256=Thgx5ESdIok4v72mKh4plxpo4smGcKALWNCJTDScY0M,447
93
93
  nkululeko/models/model_xgr.py,sha256=aGBtNGLWjOE_2rICGYGFxmT8DtnHYsIl1lIpMtghHsY,418
94
94
  nkululeko/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -104,9 +104,9 @@ nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5Ev
104
104
  nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
105
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
106
106
  nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
107
- nkululeko/utils/util.py,sha256=b1IHFucRNuF9Iyv5IJeK4AEg0Rga0xKG80UM5GWWdHA,13816
108
- nkululeko-0.86.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
109
- nkululeko-0.86.0.dist-info/METADATA,sha256=KrHrjQ6rc4oGxN4EJ_TuZ0dVGGI-qIxw8dY1RBTCnLo,36852
110
- nkululeko-0.86.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
111
- nkululeko-0.86.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
112
- nkululeko-0.86.0.dist-info/RECORD,,
107
+ nkululeko/utils/util.py,sha256=mK1MgO14NinrPhavJw72eR_2WN_kBKjVKiEJnzvdO1Q,13946
108
+ nkululeko-0.86.1.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
109
+ nkululeko-0.86.1.dist-info/METADATA,sha256=LXoMlzo5QBzABv0fpIDvf4nYDjCJkRCZL1XmffikrRc,37088
110
+ nkululeko-0.86.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
111
+ nkululeko-0.86.1.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
112
+ nkululeko-0.86.1.dist-info/RECORD,,