nkululeko 0.61.0__tar.gz → 0.62.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {nkululeko-0.61.0 → nkululeko-0.62.1}/CHANGELOG.md +11 -0
  2. {nkululeko-0.61.0/nkululeko.egg-info → nkululeko-0.62.1}/PKG-INFO +12 -1
  3. nkululeko-0.62.1/nkululeko/constants.py +1 -0
  4. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/feature_extractor.py +4 -4
  5. nkululeko-0.62.1/nkululeko/resample.py +63 -0
  6. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/segment.py +10 -15
  7. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/util.py +31 -28
  8. {nkululeko-0.61.0 → nkululeko-0.62.1/nkululeko.egg-info}/PKG-INFO +12 -1
  9. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko.egg-info/SOURCES.txt +1 -0
  10. nkululeko-0.61.0/nkululeko/constants.py +0 -1
  11. {nkululeko-0.61.0 → nkululeko-0.62.1}/LICENSE +0 -0
  12. {nkululeko-0.61.0 → nkululeko-0.62.1}/README.md +0 -0
  13. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/__init__.py +0 -0
  14. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/augment.py +0 -0
  15. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/balancer.py +0 -0
  16. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/cacheddataset.py +0 -0
  17. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/demo.py +0 -0
  18. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/demo_predictor.py +0 -0
  19. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/experiment.py +0 -0
  20. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/explore.py +0 -0
  21. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/file_checker.py +0 -0
  22. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/filter_data.py +0 -0
  23. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/glob_conf.py +0 -0
  24. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/modelrunner.py +0 -0
  25. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/nkululeko.py +0 -0
  26. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/plots.py +0 -0
  27. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/predict.py +0 -0
  28. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/reporter.py +0 -0
  29. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/result.py +0 -0
  30. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/runmanager.py +0 -0
  31. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/scaler.py +0 -0
  32. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/syllable_nuclei.py +0 -0
  33. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/test.py +0 -0
  34. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko/test_predictor.py +0 -0
  35. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko.egg-info/dependency_links.txt +0 -0
  36. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko.egg-info/requires.txt +0 -0
  37. {nkululeko-0.61.0 → nkululeko-0.62.1}/nkululeko.egg-info/top_level.txt +0 -0
  38. {nkululeko-0.61.0 → nkululeko-0.62.1}/pyproject.toml +0 -0
  39. {nkululeko-0.61.0 → nkululeko-0.62.1}/setup.cfg +0 -0
  40. {nkululeko-0.61.0 → nkululeko-0.62.1}/setup.py +0 -0
@@ -1,6 +1,17 @@
1
1
  Changelog
2
2
  =========
3
3
 
4
+ Version 0.62.0
5
+ --------------
6
+ * added min and max_length for silero segmenatation
7
+
8
+ Version 0.62.0
9
+ --------------
10
+ * fixed segment silero bug
11
+ * added all Wav2vec2 models
12
+ * added resampler module
13
+ * added error on file for embeddings
14
+
4
15
  Version 0.61.0
5
16
  --------------
6
17
  * added HUBERT embeddings
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.61.0
3
+ Version: 0.62.1
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -228,6 +228,17 @@ Nkululeko can be used under the [MIT license](https://choosealicense.com/license
228
228
  Changelog
229
229
  =========
230
230
 
231
+ Version 0.62.0
232
+ --------------
233
+ * added min and max_length for silero segmenatation
234
+
235
+ Version 0.62.0
236
+ --------------
237
+ * fixed segment silero bug
238
+ * added all Wav2vec2 models
239
+ * added resampler module
240
+ * added error on file for embeddings
241
+
231
242
  Version 0.61.0
232
243
  --------------
233
244
  * added HUBERT embeddings
@@ -0,0 +1 @@
1
+ VERSION = '0.62.1'
@@ -50,13 +50,13 @@ class FeatureExtractor:
50
50
  self.featExtractor = TRILLset(
51
51
  f"{store_name}_{self.feats_designation}", self.data_df
52
52
  )
53
- elif feats_type == "wav2vec":
53
+ elif feats_type.startswith("wav2vec2"):
54
54
  from nkululeko.feat_extract.feats_wav2vec2 import Wav2vec2
55
55
  self.featExtractor = Wav2vec2(
56
- f"{store_name}_{self.feats_designation}", self.data_df
56
+ f"{store_name}_{self.feats_designation}", self.data_df,
57
+ feats_type
57
58
  )
58
- elif feats_type in ("hubert", "hubert_ft", "hubert_large",
59
- "hubert_xlarge", "hubert_xlarge_ft"):
59
+ elif feats_type.startswith("hubert"):
60
60
  from nkululeko.feat_extract.feats_hubert import Hubert
61
61
  self.featExtractor = Hubert(
62
62
  f"{store_name}_{self.feats_designation}", self.data_df,
@@ -0,0 +1,63 @@
1
+ # resample.py
2
+ # change the sampling rate for train and test splits
3
+
4
+ from nkululeko.experiment import Experiment
5
+ import configparser
6
+ from nkululeko.util import Util
7
+ from nkululeko.constants import VERSION
8
+ import argparse
9
+ import os
10
+ import pandas as pd
11
+ from nkululeko.augmenting .resampler import Resampler
12
+
13
+ def main(src_dir):
14
+ parser = argparse.ArgumentParser(description='Call the nkululeko framework.')
15
+ parser.add_argument('--config', default='exp.ini', help='The base configuration')
16
+ args = parser.parse_args()
17
+ if args.config is not None:
18
+ config_file = args.config
19
+ else:
20
+ config_file = f'{src_dir}/exp.ini'
21
+
22
+ # test if the configuration file exists
23
+ if not os.path.isfile(config_file):
24
+ print(f'ERROR: no such file: {config_file}')
25
+ exit()
26
+
27
+ # load one configuration per experiment
28
+ config = configparser.ConfigParser()
29
+ config.read(config_file)
30
+ # create a new experiment
31
+ expr = Experiment(config)
32
+ util = Util('resample')
33
+ util.debug(f'running {expr.name} from config {config_file}, nkululeko version {VERSION}')
34
+
35
+ if util.config_val('EXP', 'no_warnings', False):
36
+ import warnings
37
+ warnings.filterwarnings("ignore")
38
+
39
+ # load the data
40
+ expr.load_datasets()
41
+
42
+ # split into train and test
43
+ expr.fill_train_and_tests()
44
+ util.debug(f'train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}')
45
+
46
+ sample_selection = util.config_val('RESAMPLE', 'sample_selection', 'all')
47
+ if sample_selection=='all':
48
+ df = pd.concat([expr.df_train, expr.df_test])
49
+ elif sample_selection=='train':
50
+ df = expr.df_train
51
+ elif sample_selection=='test':
52
+ df = expr.df_test
53
+ else:
54
+ util.error(f'unknown selection specifier {sample_selection}, should be [all | train | test]')
55
+ util.debug(f'resampling {sample_selection}: {df.shape[0]} samples')
56
+ rs = Resampler(df)
57
+ rs.resample()
58
+ print('DONE')
59
+
60
+
61
+ if __name__ == "__main__":
62
+ cwd = os.path.dirname(os.path.abspath(__file__))
63
+ main(cwd) # use this if you want to state the config file path on command line
@@ -44,16 +44,6 @@ def main(src_dir):
44
44
 
45
45
  # segment
46
46
  segment_target = util.config_val('SEGMENT', 'target', '_seg')
47
- # this if a specific dataset is to be segmented
48
- # segment_db = util.config_val('DATA', 'segment', False)
49
- # if segment_db:
50
- # for dataset in expr.datasets.keys:
51
- # if segment_db == dataset:
52
- # df = expr.datasets[dataset].df
53
- # util.debug(f'segmenting {dataset}')
54
- # df_seg = segment_dataframe(df)
55
- # name = f'{dataset}{segment_target}'
56
- # df_seg.to_csv(f'{expr.data_dir}/{name}.csv')
57
47
 
58
48
  segmenter = util.config_val('SEGMENT', 'method', 'silero')
59
49
  sample_selection = util.config_val('SEGMENT', 'sample_selection', 'all')
@@ -65,8 +55,6 @@ def main(src_dir):
65
55
  df = expr.df_test
66
56
  else:
67
57
  util.error(f'unknown segmentation selection specifier {sample_selection}, should be [all | train | test]')
68
- # if "duration" not in df.columns:
69
- # df = df.drop(columns=['duration'], inplace=True)
70
58
  util.debug(f'segmenting {sample_selection}: {df.shape[0]} samples with {segmenter}')
71
59
  if segmenter=='silero':
72
60
  from nkululeko.segmenting.seg_silero import Silero_segmenter
@@ -86,14 +74,21 @@ def main(src_dir):
86
74
  df['duration'] = df.index.to_series().map(lambda x:calc_dur(x))
87
75
  num_before = df.shape[0]
88
76
  num_after = df_seg.shape[0]
89
- dataname = '_'.join(expr.datasets.keys())
90
- name = f'{dataname}{segment_target}'
91
- df_seg.to_csv(f'{expr.data_dir}/{name}.csv')
77
+ # plot distributions
92
78
  from nkululeko.plots import Plots
93
79
  plots = Plots()
94
80
  plots.plot_durations(df, 'original_durations', sample_selection)
95
81
  plots.plot_durations(df_seg, 'segmented_durations', sample_selection)
96
82
  print('')
83
+ # remove encoded labels
84
+ target = util.config_val('DATA', 'target', 'emotion')
85
+ if 'class_label' in df_seg.columns:
86
+ df_seg = df_seg.drop(columns=[target])
87
+ df_seg = df_seg.rename(columns={'class_label':target})
88
+ # save file
89
+ dataname = '_'.join(expr.datasets.keys())
90
+ name = f'{dataname}{segment_target}'
91
+ df_seg.to_csv(f'{expr.data_dir}/{name}.csv')
97
92
  util.debug(f'saved {name}.csv to {expr.data_dir}, {num_after} samples (was {num_before})')
98
93
  print('DONE')
99
94
 
@@ -2,7 +2,6 @@
2
2
  import audeer
3
3
  import ast
4
4
  import sys
5
- import nkululeko.glob_conf as glob_conf
6
5
  import numpy as np
7
6
  import os.path
8
7
  import configparser
@@ -11,16 +10,20 @@ import pandas as pd
11
10
 
12
11
  class Util:
13
12
 
13
+ # a list of words that need not to be warned upon if default values are used
14
14
  stopvals = [False, 'False', 'classification', 'png']
15
15
 
16
- def __init__(self, caller=None):
17
- self.got_data_roots = self.config_val('DATA', 'root_folders', False)
18
- if self.got_data_roots:
19
- # if there is a global data rootfolder file, read from there
20
- if not os.path.isfile(self.got_data_roots):
21
- self.error(f'no such file: {self.got_data_roots}')
22
- self.data_roots = configparser.ConfigParser()
23
- self.data_roots.read(self.got_data_roots)
16
+ def __init__(self, caller=None, has_config=True):
17
+ if has_config:
18
+ import nkululeko.glob_conf as glob_conf
19
+ self.config = glob_conf.config
20
+ self.got_data_roots = self.config_val('DATA', 'root_folders', False)
21
+ if self.got_data_roots:
22
+ # if there is a global data rootfolder file, read from there
23
+ if not os.path.isfile(self.got_data_roots):
24
+ self.error(f'no such file: {self.got_data_roots}')
25
+ self.data_roots = configparser.ConfigParser()
26
+ self.data_roots.read(self.got_data_roots)
24
27
  if caller is not None:
25
28
  self.caller = caller
26
29
  else:
@@ -30,10 +33,10 @@ class Util:
30
33
  """
31
34
  This method allows the user to get the directory path for the given argument.
32
35
  """
33
- root = glob_conf.config['EXP']['root']
34
- name = glob_conf.config['EXP']['name']
36
+ root = self.config['EXP']['root']
37
+ name = self.config['EXP']['name']
35
38
  try:
36
- entryn = glob_conf.config['EXP'][entry]
39
+ entryn = self.config['EXP'][entry]
37
40
  except KeyError:
38
41
  # some default values
39
42
  if entry == 'fig_dir':
@@ -61,7 +64,7 @@ class Util:
61
64
  If the value is present in the experiment configuration it will be used, else
62
65
  we look in a global file specified by the root_folders value.
63
66
  """
64
- configuration = glob_conf.config
67
+ configuration = self.config
65
68
  try:
66
69
  if len(key)>0:
67
70
  return configuration['DATA'][dataset+'.'+key]
@@ -95,8 +98,8 @@ class Util:
95
98
  """
96
99
  Get the experiment directory
97
100
  """
98
- root = glob_conf.config['EXP']['root']
99
- name = glob_conf.config['EXP']['name']
101
+ root = self.config['EXP']['root']
102
+ name = self.config['EXP']['name']
100
103
  dir_name = f'{root}{name}'
101
104
  audeer.mkdir(dir_name)
102
105
  return dir_name
@@ -124,19 +127,19 @@ class Util:
124
127
  """
125
128
  Get a string as name from all databases that are useed
126
129
  """
127
- return '_'.join(ast.literal_eval(glob_conf.config['DATA']['databases']))
130
+ return '_'.join(ast.literal_eval(self.config['DATA']['databases']))
128
131
 
129
132
  def get_exp_name(self, only_train = False, only_data = False):
130
133
  trains_val = self.config_val('DATA', 'trains', False)
131
134
  if only_train and trains_val:
132
135
  # try to get only the train tables
133
- ds = '_'.join(ast.literal_eval(glob_conf.config['DATA']['trains']))
136
+ ds = '_'.join(ast.literal_eval(self.config['DATA']['trains']))
134
137
  else:
135
- ds = '_'.join(ast.literal_eval(glob_conf.config['DATA']['databases']))
138
+ ds = '_'.join(ast.literal_eval(self.config['DATA']['databases']))
136
139
  mt = ''
137
140
  if not only_data:
138
- mt = f'_{glob_conf.config["MODEL"]["type"]}'
139
- ft = '_'.join(ast.literal_eval(glob_conf.config['FEATS']['type']))
141
+ mt = f'_{self.config["MODEL"]["type"]}'
142
+ ft = '_'.join(ast.literal_eval(self.config['FEATS']['type']))
140
143
  ft += '_'
141
144
  set = self.config_val('FEATS', 'set', False)
142
145
  set_string = ''
@@ -159,7 +162,7 @@ class Util:
159
162
 
160
163
  def get_plot_name(self):
161
164
  try:
162
- plot_name = glob_conf.config['PLOT']['name']
165
+ plot_name = self.config['PLOT']['name']
163
166
  except KeyError:
164
167
  plot_name = self.get_exp_name()
165
168
  return plot_name
@@ -183,10 +186,10 @@ class Util:
183
186
  def set_config_val(self, section, key, value):
184
187
  try:
185
188
  # does the section already exists?
186
- glob_conf.config[section][key] = str(value)
189
+ self.config[section][key] = str(value)
187
190
  except KeyError:
188
- glob_conf.config.add_section(section)
189
- glob_conf.config[section][key] = str(value)
191
+ self.config.add_section(section)
192
+ self.config[section][key] = str(value)
190
193
 
191
194
  def check_df(self, i, df):
192
195
  """Check a dataframe"""
@@ -195,7 +198,7 @@ class Util:
195
198
  )
196
199
  def config_val(self, section, key, default):
197
200
  try:
198
- return glob_conf.config[section][key]
201
+ return self.config[section][key]
199
202
  except KeyError:
200
203
  if not default in self.stopvals:
201
204
  self.debug(f'value for {key} not found, using default: {default}')
@@ -203,7 +206,7 @@ class Util:
203
206
 
204
207
  def config_val_list(self, section, key, default):
205
208
  try:
206
- return ast.literal_eval(glob_conf.config[section][key])
209
+ return ast.literal_eval(self.config[section][key])
207
210
  except KeyError:
208
211
  if not default in self.stopvals:
209
212
  self.debug(f'value for {key} not found, using default: {default}')
@@ -213,11 +216,11 @@ class Util:
213
216
  # try:
214
217
  # labels = glob_conf.label_encoder.classes_
215
218
  # except AttributeError:
216
- labels = ast.literal_eval(glob_conf.config['DATA']['labels'])
219
+ labels = ast.literal_eval(self.config['DATA']['labels'])
217
220
  return labels
218
221
 
219
222
  def continuous_to_categorical(self, array):
220
- bins = ast.literal_eval(glob_conf.config['DATA']['bins'])
223
+ bins = ast.literal_eval(self.config['DATA']['bins'])
221
224
  result = np.digitize(array, bins)-1
222
225
  return result
223
226
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.61.0
3
+ Version: 0.62.1
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -228,6 +228,17 @@ Nkululeko can be used under the [MIT license](https://choosealicense.com/license
228
228
  Changelog
229
229
  =========
230
230
 
231
+ Version 0.62.0
232
+ --------------
233
+ * added min and max_length for silero segmenatation
234
+
235
+ Version 0.62.0
236
+ --------------
237
+ * fixed segment silero bug
238
+ * added all Wav2vec2 models
239
+ * added resampler module
240
+ * added error on file for embeddings
241
+
231
242
  Version 0.61.0
232
243
  --------------
233
244
  * added HUBERT embeddings
@@ -22,6 +22,7 @@ nkululeko/nkululeko.py
22
22
  nkululeko/plots.py
23
23
  nkululeko/predict.py
24
24
  nkululeko/reporter.py
25
+ nkululeko/resample.py
25
26
  nkululeko/result.py
26
27
  nkululeko/runmanager.py
27
28
  nkululeko/scaler.py
@@ -1 +0,0 @@
1
- VERSION = '0.61.0'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes