nkululeko 0.61.0__py3-none-any.whl → 0.62.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1 +1 @@
1
- VERSION = '0.61.0'
1
+ VERSION = '0.62.0'
@@ -50,13 +50,13 @@ class FeatureExtractor:
50
50
  self.featExtractor = TRILLset(
51
51
  f"{store_name}_{self.feats_designation}", self.data_df
52
52
  )
53
- elif feats_type == "wav2vec":
53
+ elif feats_type.startswith("wav2vec2"):
54
54
  from nkululeko.feat_extract.feats_wav2vec2 import Wav2vec2
55
55
  self.featExtractor = Wav2vec2(
56
- f"{store_name}_{self.feats_designation}", self.data_df
56
+ f"{store_name}_{self.feats_designation}", self.data_df,
57
+ feats_type
57
58
  )
58
- elif feats_type in ("hubert", "hubert_ft", "hubert_large",
59
- "hubert_xlarge", "hubert_xlarge_ft"):
59
+ elif feats_type.startswith("hubert"):
60
60
  from nkululeko.feat_extract.feats_hubert import Hubert
61
61
  self.featExtractor = Hubert(
62
62
  f"{store_name}_{self.feats_designation}", self.data_df,
nkululeko/resample.py ADDED
@@ -0,0 +1,63 @@
1
+ # resample.py
2
+ # change the sampling rate for train and test splits
3
+
4
+ from nkululeko.experiment import Experiment
5
+ import configparser
6
+ from nkululeko.util import Util
7
+ from nkululeko.constants import VERSION
8
+ import argparse
9
+ import os
10
+ import pandas as pd
11
+ from nkululeko.augmenting .resampler import Resampler
12
+
13
+ def main(src_dir):
14
+ parser = argparse.ArgumentParser(description='Call the nkululeko framework.')
15
+ parser.add_argument('--config', default='exp.ini', help='The base configuration')
16
+ args = parser.parse_args()
17
+ if args.config is not None:
18
+ config_file = args.config
19
+ else:
20
+ config_file = f'{src_dir}/exp.ini'
21
+
22
+ # test if the configuration file exists
23
+ if not os.path.isfile(config_file):
24
+ print(f'ERROR: no such file: {config_file}')
25
+ exit()
26
+
27
+ # load one configuration per experiment
28
+ config = configparser.ConfigParser()
29
+ config.read(config_file)
30
+ # create a new experiment
31
+ expr = Experiment(config)
32
+ util = Util('resample')
33
+ util.debug(f'running {expr.name} from config {config_file}, nkululeko version {VERSION}')
34
+
35
+ if util.config_val('EXP', 'no_warnings', False):
36
+ import warnings
37
+ warnings.filterwarnings("ignore")
38
+
39
+ # load the data
40
+ expr.load_datasets()
41
+
42
+ # split into train and test
43
+ expr.fill_train_and_tests()
44
+ util.debug(f'train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}')
45
+
46
+ sample_selection = util.config_val('RESAMPLE', 'sample_selection', 'all')
47
+ if sample_selection=='all':
48
+ df = pd.concat([expr.df_train, expr.df_test])
49
+ elif sample_selection=='train':
50
+ df = expr.df_train
51
+ elif sample_selection=='test':
52
+ df = expr.df_test
53
+ else:
54
+ util.error(f'unknown selection specifier {sample_selection}, should be [all | train | test]')
55
+ util.debug(f'resampling {sample_selection}: {df.shape[0]} samples')
56
+ rs = Resampler(df)
57
+ rs.resample()
58
+ print('DONE')
59
+
60
+
61
+ if __name__ == "__main__":
62
+ cwd = os.path.dirname(os.path.abspath(__file__))
63
+ main(cwd) # use this if you want to state the config file path on command line
nkululeko/segment.py CHANGED
@@ -44,16 +44,6 @@ def main(src_dir):
44
44
 
45
45
  # segment
46
46
  segment_target = util.config_val('SEGMENT', 'target', '_seg')
47
- # this if a specific dataset is to be segmented
48
- # segment_db = util.config_val('DATA', 'segment', False)
49
- # if segment_db:
50
- # for dataset in expr.datasets.keys:
51
- # if segment_db == dataset:
52
- # df = expr.datasets[dataset].df
53
- # util.debug(f'segmenting {dataset}')
54
- # df_seg = segment_dataframe(df)
55
- # name = f'{dataset}{segment_target}'
56
- # df_seg.to_csv(f'{expr.data_dir}/{name}.csv')
57
47
 
58
48
  segmenter = util.config_val('SEGMENT', 'method', 'silero')
59
49
  sample_selection = util.config_val('SEGMENT', 'sample_selection', 'all')
@@ -65,8 +55,6 @@ def main(src_dir):
65
55
  df = expr.df_test
66
56
  else:
67
57
  util.error(f'unknown segmentation selection specifier {sample_selection}, should be [all | train | test]')
68
- # if "duration" not in df.columns:
69
- # df = df.drop(columns=['duration'], inplace=True)
70
58
  util.debug(f'segmenting {sample_selection}: {df.shape[0]} samples with {segmenter}')
71
59
  if segmenter=='silero':
72
60
  from nkululeko.segmenting.seg_silero import Silero_segmenter
@@ -86,14 +74,21 @@ def main(src_dir):
86
74
  df['duration'] = df.index.to_series().map(lambda x:calc_dur(x))
87
75
  num_before = df.shape[0]
88
76
  num_after = df_seg.shape[0]
89
- dataname = '_'.join(expr.datasets.keys())
90
- name = f'{dataname}{segment_target}'
91
- df_seg.to_csv(f'{expr.data_dir}/{name}.csv')
77
+ # plot distributions
92
78
  from nkululeko.plots import Plots
93
79
  plots = Plots()
94
80
  plots.plot_durations(df, 'original_durations', sample_selection)
95
81
  plots.plot_durations(df_seg, 'segmented_durations', sample_selection)
96
82
  print('')
83
+ # remove encoded labels
84
+ target = util.config_val('DATA', 'target', 'emotion')
85
+ if 'class_label' in df_seg.columns:
86
+ df_seg = df_seg.drop(columns=[target])
87
+ df_seg = df_seg.rename(columns={'class_label':target})
88
+ # save file
89
+ dataname = '_'.join(expr.datasets.keys())
90
+ name = f'{dataname}{segment_target}'
91
+ df_seg.to_csv(f'{expr.data_dir}/{name}.csv')
97
92
  util.debug(f'saved {name}.csv to {expr.data_dir}, {num_after} samples (was {num_before})')
98
93
  print('DONE')
99
94
 
nkululeko/util.py CHANGED
@@ -2,7 +2,6 @@
2
2
  import audeer
3
3
  import ast
4
4
  import sys
5
- import nkululeko.glob_conf as glob_conf
6
5
  import numpy as np
7
6
  import os.path
8
7
  import configparser
@@ -11,16 +10,20 @@ import pandas as pd
11
10
 
12
11
  class Util:
13
12
 
13
+ # a list of words that need not to be warned upon if default values are used
14
14
  stopvals = [False, 'False', 'classification', 'png']
15
15
 
16
- def __init__(self, caller=None):
17
- self.got_data_roots = self.config_val('DATA', 'root_folders', False)
18
- if self.got_data_roots:
19
- # if there is a global data rootfolder file, read from there
20
- if not os.path.isfile(self.got_data_roots):
21
- self.error(f'no such file: {self.got_data_roots}')
22
- self.data_roots = configparser.ConfigParser()
23
- self.data_roots.read(self.got_data_roots)
16
+ def __init__(self, caller=None, has_config=True):
17
+ if has_config:
18
+ import nkululeko.glob_conf as glob_conf
19
+ self.config = glob_conf.config
20
+ self.got_data_roots = self.config_val('DATA', 'root_folders', False)
21
+ if self.got_data_roots:
22
+ # if there is a global data rootfolder file, read from there
23
+ if not os.path.isfile(self.got_data_roots):
24
+ self.error(f'no such file: {self.got_data_roots}')
25
+ self.data_roots = configparser.ConfigParser()
26
+ self.data_roots.read(self.got_data_roots)
24
27
  if caller is not None:
25
28
  self.caller = caller
26
29
  else:
@@ -30,10 +33,10 @@ class Util:
30
33
  """
31
34
  This method allows the user to get the directory path for the given argument.
32
35
  """
33
- root = glob_conf.config['EXP']['root']
34
- name = glob_conf.config['EXP']['name']
36
+ root = self.config['EXP']['root']
37
+ name = self.config['EXP']['name']
35
38
  try:
36
- entryn = glob_conf.config['EXP'][entry]
39
+ entryn = self.config['EXP'][entry]
37
40
  except KeyError:
38
41
  # some default values
39
42
  if entry == 'fig_dir':
@@ -61,7 +64,7 @@ class Util:
61
64
  If the value is present in the experiment configuration it will be used, else
62
65
  we look in a global file specified by the root_folders value.
63
66
  """
64
- configuration = glob_conf.config
67
+ configuration = self.config
65
68
  try:
66
69
  if len(key)>0:
67
70
  return configuration['DATA'][dataset+'.'+key]
@@ -95,8 +98,8 @@ class Util:
95
98
  """
96
99
  Get the experiment directory
97
100
  """
98
- root = glob_conf.config['EXP']['root']
99
- name = glob_conf.config['EXP']['name']
101
+ root = self.config['EXP']['root']
102
+ name = self.config['EXP']['name']
100
103
  dir_name = f'{root}{name}'
101
104
  audeer.mkdir(dir_name)
102
105
  return dir_name
@@ -124,19 +127,19 @@ class Util:
124
127
  """
125
128
  Get a string as name from all databases that are useed
126
129
  """
127
- return '_'.join(ast.literal_eval(glob_conf.config['DATA']['databases']))
130
+ return '_'.join(ast.literal_eval(self.config['DATA']['databases']))
128
131
 
129
132
  def get_exp_name(self, only_train = False, only_data = False):
130
133
  trains_val = self.config_val('DATA', 'trains', False)
131
134
  if only_train and trains_val:
132
135
  # try to get only the train tables
133
- ds = '_'.join(ast.literal_eval(glob_conf.config['DATA']['trains']))
136
+ ds = '_'.join(ast.literal_eval(self.config['DATA']['trains']))
134
137
  else:
135
- ds = '_'.join(ast.literal_eval(glob_conf.config['DATA']['databases']))
138
+ ds = '_'.join(ast.literal_eval(self.config['DATA']['databases']))
136
139
  mt = ''
137
140
  if not only_data:
138
- mt = f'_{glob_conf.config["MODEL"]["type"]}'
139
- ft = '_'.join(ast.literal_eval(glob_conf.config['FEATS']['type']))
141
+ mt = f'_{self.config["MODEL"]["type"]}'
142
+ ft = '_'.join(ast.literal_eval(self.config['FEATS']['type']))
140
143
  ft += '_'
141
144
  set = self.config_val('FEATS', 'set', False)
142
145
  set_string = ''
@@ -159,7 +162,7 @@ class Util:
159
162
 
160
163
  def get_plot_name(self):
161
164
  try:
162
- plot_name = glob_conf.config['PLOT']['name']
165
+ plot_name = self.config['PLOT']['name']
163
166
  except KeyError:
164
167
  plot_name = self.get_exp_name()
165
168
  return plot_name
@@ -183,10 +186,10 @@ class Util:
183
186
  def set_config_val(self, section, key, value):
184
187
  try:
185
188
  # does the section already exists?
186
- glob_conf.config[section][key] = str(value)
189
+ self.config[section][key] = str(value)
187
190
  except KeyError:
188
- glob_conf.config.add_section(section)
189
- glob_conf.config[section][key] = str(value)
191
+ self.config.add_section(section)
192
+ self.config[section][key] = str(value)
190
193
 
191
194
  def check_df(self, i, df):
192
195
  """Check a dataframe"""
@@ -195,7 +198,7 @@ class Util:
195
198
  )
196
199
  def config_val(self, section, key, default):
197
200
  try:
198
- return glob_conf.config[section][key]
201
+ return self.config[section][key]
199
202
  except KeyError:
200
203
  if not default in self.stopvals:
201
204
  self.debug(f'value for {key} not found, using default: {default}')
@@ -203,7 +206,7 @@ class Util:
203
206
 
204
207
  def config_val_list(self, section, key, default):
205
208
  try:
206
- return ast.literal_eval(glob_conf.config[section][key])
209
+ return ast.literal_eval(self.config[section][key])
207
210
  except KeyError:
208
211
  if not default in self.stopvals:
209
212
  self.debug(f'value for {key} not found, using default: {default}')
@@ -213,11 +216,11 @@ class Util:
213
216
  # try:
214
217
  # labels = glob_conf.label_encoder.classes_
215
218
  # except AttributeError:
216
- labels = ast.literal_eval(glob_conf.config['DATA']['labels'])
219
+ labels = ast.literal_eval(self.config['DATA']['labels'])
217
220
  return labels
218
221
 
219
222
  def continuous_to_categorical(self, array):
220
- bins = ast.literal_eval(glob_conf.config['DATA']['bins'])
223
+ bins = ast.literal_eval(self.config['DATA']['bins'])
221
224
  result = np.digitize(array, bins)-1
222
225
  return result
223
226
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.61.0
3
+ Version: 0.62.0
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -253,6 +253,13 @@ Nkululeko can be used under the [MIT license](https://choosealicense.com/license
253
253
  Changelog
254
254
  =========
255
255
 
256
+ Version 0.62.0
257
+ --------------
258
+ * fixed segment silero bug
259
+ * added all Wav2vec2 models
260
+ * added resampler module
261
+ * added error on file for embeddings
262
+
256
263
  Version 0.61.0
257
264
  --------------
258
265
  * added HUBERT embeddings
@@ -2,12 +2,12 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/augment.py,sha256=ebv5QebGD8wLzXInvusjn4kFlET6-yXkYoF132BrubQ,1750
3
3
  nkululeko/balancer.py,sha256=64ftZN68sMDfkvuovCDHpAHmSJgCO6Kdk9bwmpSisec,12
4
4
  nkululeko/cacheddataset.py,sha256=bSJ_SDg7TxL89YL_pJXp-sFvdUXJtHuBTd5KSTE4AkQ,955
5
- nkululeko/constants.py,sha256=xSkfM3CWCYGD40D6GhHHCrxrw0JNK-372YdiRKMknWs,18
5
+ nkululeko/constants.py,sha256=7u8AMX9auXDDDSpQnfb8--c2xIvy8IAsVaxENC-wi0k,18
6
6
  nkululeko/demo.py,sha256=nGP3fUDXuW1ZF12AzMpzRWXct0rdqYRJVNgA9B_QWwA,1821
7
7
  nkululeko/demo_predictor.py,sha256=VVxE2lf5lTkAP5qElG5U2bK6SdDzQ2Jmf0Vn_yHpSro,2302
8
8
  nkululeko/experiment.py,sha256=9qStgy31svY4bBVZOkuJ0JFjEQ1sIT2ibIdJ6IVlfTI,25063
9
9
  nkululeko/explore.py,sha256=aemOk5XYw7axQEJQfdABEUxN3img0NYSb8l6W-nDzZY,2090
10
- nkululeko/feature_extractor.py,sha256=2LqPIiDAoaBRhjcKik2hjBEBVBsLLxx8blQvTD43TRg,6324
10
+ nkululeko/feature_extractor.py,sha256=h2kMJR6fElshCUXw_A6fg3gNbKRMXrZR4SGhaaQI4wk,6274
11
11
  nkululeko/file_checker.py,sha256=Nw05SIp7Ez1U9ZeFhNGz0XivwKr43hHg1WsfzKsrFPQ,3510
12
12
  nkululeko/filter_data.py,sha256=g7giEShbA-dr2ekVycW5WurFG-UaopJvDZWylKNZtpM,6717
13
13
  nkululeko/glob_conf.py,sha256=2Tl0NZQeVeydDO8T2tuJC2lCv99wvaTVRX9Dy1Mq_L4,237
@@ -16,16 +16,17 @@ nkululeko/nkululeko.py,sha256=O2Zw7u-Mb7VP9MPxAlhdTkXV2lW2kETIuSJp7mfj_Tc,1671
16
16
  nkululeko/plots.py,sha256=hoOFLbWXpV5jGDWHEpy345_4vpaGKGMAv2JwvpNUxkw,11454
17
17
  nkululeko/predict.py,sha256=3ei4wn2by0p9Vkv7cllMcszmEjSM2vX0T6x_5rlgT28,1851
18
18
  nkululeko/reporter.py,sha256=359aeQWt0ZGLseaJnOfafYG8BrwumiM2Q58DWiaoyWQ,10177
19
+ nkululeko/resample.py,sha256=392zQEpRaWGwBvRBMXY9j2WtTTEaELk8HRPQ6ajzJSQ,2121
19
20
  nkululeko/result.py,sha256=Ey5FPsAyfnQVtzO_J6_4hkOAZ191YWmF_vXxlgNjCdc,406
20
21
  nkululeko/runmanager.py,sha256=ll04dEu5Y1nOi8QOtmSiw3oxzcXeASdQsg0t-vxCny8,6765
21
22
  nkululeko/scaler.py,sha256=6NQHbSQZO9HIfhYNlliuDRywjaEH_FVKHRskTJ2xr90,3021
22
- nkululeko/segment.py,sha256=GGyovnZ75Sqd8TgBH5fi3fjRkVw_ygqBQD46Yn6GVQ4,4660
23
+ nkululeko/segment.py,sha256=YcrSQ_iP80inPZbXN4jFpKbMmGxI_Xcev5IfK0GSdJ0,4349
23
24
  nkululeko/syllable_nuclei.py,sha256=vK9dj5deqRnyEmlZmhFtKPzqKVGNCgTnWaG8UDITKNg,9913
24
25
  nkululeko/test.py,sha256=BbHGliDChAXqMe2oA579dJpyZSlPGAm5997lX_POboQ,1372
25
26
  nkululeko/test_predictor.py,sha256=QwdAVPHNew9w5PD_sPFhhWVDTYRAbUE6fkAp58X8Hjg,2410
26
- nkululeko/util.py,sha256=gZrNTF4C7hKkEMCC_hoNkEAhAViWzWebP8LsHRew7s4,9731
27
- nkululeko-0.61.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
28
- nkululeko-0.61.0.dist-info/METADATA,sha256=LXJjW9KpGkPum60eGuYW__gl5QKXVRhnm6RySrKo2b8,21680
29
- nkululeko-0.61.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
30
- nkululeko-0.61.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
31
- nkululeko-0.61.0.dist-info/RECORD,,
27
+ nkululeko/util.py,sha256=VE8HbcdUkfHbo1xQNVznyemoE-OVeWgDwsjbZpEBqUA,9840
28
+ nkululeko-0.62.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
29
+ nkululeko-0.62.0.dist-info/METADATA,sha256=lQogxZd1eFD1PEKzwujzUSHQU3sju2IY3v7QQM0ddLE,21828
30
+ nkululeko-0.62.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
31
+ nkululeko-0.62.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
32
+ nkululeko-0.62.0.dist-info/RECORD,,