PyPI - nkululeko - Versions diffs - 0.61.0__py3-none-any.whl → 0.62.0__py3-none-any.whl - Mend

nkululeko 0.61.0py3-none-any.whl → 0.62.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

nkululeko/constants.py +1 -1
nkululeko/feature_extractor.py +4 -4
nkululeko/resample.py +63 -0
nkululeko/segment.py +10 -15
nkululeko/util.py +31 -28
{nkululeko-0.61.0.dist-info → nkululeko-0.62.0.dist-info}/METADATA +8 -1
{nkululeko-0.61.0.dist-info → nkululeko-0.62.0.dist-info}/RECORD +10 -9
{nkululeko-0.61.0.dist-info → nkululeko-0.62.0.dist-info}/LICENSE +0 -0
{nkululeko-0.61.0.dist-info → nkululeko-0.62.0.dist-info}/WHEEL +0 -0
{nkululeko-0.61.0.dist-info → nkululeko-0.62.0.dist-info}/top_level.txt +0 -0

nkululeko/constants.py CHANGED Viewed

	@@ -1 +1 @@
1	- VERSION = '0.61.0'
1	+ VERSION = '0.62.0'

nkululeko/feature_extractor.py CHANGED Viewed

@@ -50,13 +50,13 @@ class FeatureExtractor:
                 self.featExtractor = TRILLset(
                     f"{store_name}_{self.feats_designation}", self.data_df
                 )
-            elif feats_type == "wav2vec":
+            elif feats_type.startswith("wav2vec2"):
                 from nkululeko.feat_extract.feats_wav2vec2 import Wav2vec2
                 self.featExtractor = Wav2vec2(
-                    f"{store_name}_{self.feats_designation}", self.data_df
+                    f"{store_name}_{self.feats_designation}", self.data_df,
+                    feats_type
                 )
-            elif feats_type in ("hubert", "hubert_ft", "hubert_large",
-                                "hubert_xlarge", "hubert_xlarge_ft"):
+            elif feats_type.startswith("hubert"):
                 from nkululeko.feat_extract.feats_hubert import Hubert
                 self.featExtractor = Hubert(
                     f"{store_name}_{self.feats_designation}", self.data_df,

nkululeko/resample.py ADDED Viewed

@@ -0,0 +1,63 @@
+# resample.py
+# change the sampling rate for train and test splits
+from nkululeko.experiment import Experiment
+import configparser
+from nkululeko.util import Util
+from  nkululeko.constants import VERSION
+import argparse
+import os
+import pandas as pd
+from nkululeko.augmenting .resampler import Resampler
+def main(src_dir):
+    parser = argparse.ArgumentParser(description='Call the nkululeko framework.')
+    parser.add_argument('--config', default='exp.ini', help='The base configuration')
+    args = parser.parse_args()
+    if args.config is not None:
+        config_file = args.config
+    else:
+        config_file = f'{src_dir}/exp.ini'
+    # test if the configuration file exists
+    if not os.path.isfile(config_file):
+        print(f'ERROR: no such file: {config_file}')
+        exit()
+    # load one configuration per experiment
+    config = configparser.ConfigParser()
+    config.read(config_file)
+    # create a new experiment
+    expr = Experiment(config)
+    util = Util('resample')
+    util.debug(f'running {expr.name} from config {config_file}, nkululeko version {VERSION}')
+    if util.config_val('EXP', 'no_warnings', False):
+        import warnings
+        warnings.filterwarnings("ignore")
+    # load the data
+    expr.load_datasets()
+    # split into train and test
+    expr.fill_train_and_tests()
+    util.debug(f'train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}')
+    sample_selection = util.config_val('RESAMPLE', 'sample_selection', 'all')
+    if sample_selection=='all':
+        df = pd.concat([expr.df_train, expr.df_test])
+    elif sample_selection=='train':
+        df = expr.df_train
+    elif sample_selection=='test':
+        df = expr.df_test
+    else:
+        util.error(f'unknown selection specifier {sample_selection}, should be [all | train | test]')
+    util.debug(f'resampling {sample_selection}: {df.shape[0]} samples')
+    rs = Resampler(df)
+    rs.resample()
+    print('DONE')
+if __name__ == "__main__":
+    cwd = os.path.dirname(os.path.abspath(__file__))
+    main(cwd) # use this if you want to state the config file path on command line

nkululeko/segment.py CHANGED Viewed

@@ -44,16 +44,6 @@ def main(src_dir):
     # segment
     segment_target = util.config_val('SEGMENT', 'target', '_seg')
-    # this if a specific dataset is to be segmented
-    # segment_db = util.config_val('DATA', 'segment', False)
-    # if segment_db:
-    #     for dataset in expr.datasets.keys:
-    #         if segment_db == dataset:
-    #             df = expr.datasets[dataset].df
-    #             util.debug(f'segmenting {dataset}')
-    #             df_seg = segment_dataframe(df)
-    #             name = f'{dataset}{segment_target}'
-    #             df_seg.to_csv(f'{expr.data_dir}/{name}.csv')
     segmenter = util.config_val('SEGMENT', 'method', 'silero')
     sample_selection = util.config_val('SEGMENT', 'sample_selection', 'all')
@@ -65,8 +55,6 @@ def main(src_dir):
         df = expr.df_test
     else:
         util.error(f'unknown segmentation selection specifier {sample_selection}, should be [all | train | test]')
-    # if "duration" not in df.columns:
-    #     df = df.drop(columns=['duration'], inplace=True)
     util.debug(f'segmenting {sample_selection}: {df.shape[0]} samples with {segmenter}')
     if segmenter=='silero':
         from nkululeko.segmenting.seg_silero import Silero_segmenter
@@ -86,14 +74,21 @@ def main(src_dir):
         df['duration'] = df.index.to_series().map(lambda x:calc_dur(x))
     num_before = df.shape[0]
     num_after = df_seg.shape[0]
-    dataname = '_'.join(expr.datasets.keys())
-    name = f'{dataname}{segment_target}'
-    df_seg.to_csv(f'{expr.data_dir}/{name}.csv')
+    # plot distributions
     from nkululeko.plots import Plots
     plots = Plots()
     plots.plot_durations(df, 'original_durations', sample_selection)
     plots.plot_durations(df_seg, 'segmented_durations', sample_selection)
     print('')
+    # remove encoded labels
+    target = util.config_val('DATA', 'target', 'emotion')
+    if 'class_label' in df_seg.columns:
+        df_seg = df_seg.drop(columns=[target])
+        df_seg = df_seg.rename(columns={'class_label':target})
+    # save file
+    dataname = '_'.join(expr.datasets.keys())
+    name = f'{dataname}{segment_target}'
+    df_seg.to_csv(f'{expr.data_dir}/{name}.csv')
     util.debug(f'saved {name}.csv to {expr.data_dir}, {num_after} samples (was {num_before})')
     print('DONE')

nkululeko/util.py CHANGED Viewed

@@ -2,7 +2,6 @@
 import audeer
 import ast
 import sys
-import nkululeko.glob_conf as glob_conf
 import numpy as np
 import os.path
 import configparser
@@ -11,16 +10,20 @@ import pandas as pd
 class Util:
+    # a list of words that need not to be warned upon if default values are used
     stopvals = [False, 'False', 'classification', 'png']
-    def __init__(self, caller=None):
-        self.got_data_roots = self.config_val('DATA', 'root_folders', False)
-        if self.got_data_roots:
-            # if there is a global data rootfolder file, read from there
-            if not os.path.isfile(self.got_data_roots):
-                self.error(f'no such file: {self.got_data_roots}')
-            self.data_roots = configparser.ConfigParser()
-            self.data_roots.read(self.got_data_roots)
+    def __init__(self, caller=None, has_config=True):
+        if has_config:
+            import nkululeko.glob_conf as glob_conf
+            self.config = glob_conf.config
+            self.got_data_roots = self.config_val('DATA', 'root_folders', False)
+            if self.got_data_roots:
+                # if there is a global data rootfolder file, read from there
+                if not os.path.isfile(self.got_data_roots):
+                    self.error(f'no such file: {self.got_data_roots}')
+                self.data_roots = configparser.ConfigParser()
+                self.data_roots.read(self.got_data_roots)
         if caller is not None:
             self.caller = caller
         else:
@@ -30,10 +33,10 @@ class Util:
         """
         This method allows the user to get the directory path for the given argument.
         """
-        root = glob_conf.config['EXP']['root']
-        name = glob_conf.config['EXP']['name']
+        root = self.config['EXP']['root']
+        name = self.config['EXP']['name']
         try:
-            entryn = glob_conf.config['EXP'][entry]
+            entryn = self.config['EXP'][entry]
         except KeyError:
             # some default values
             if entry == 'fig_dir':
@@ -61,7 +64,7 @@ class Util:
         If the value is present in the experiment configuration it will be used, else
         we look in a global file specified by the root_folders value.
         """
-        configuration = glob_conf.config
+        configuration = self.config
         try:
             if len(key)>0:
                 return configuration['DATA'][dataset+'.'+key]
@@ -95,8 +98,8 @@ class Util:
         """
             Get the experiment directory
         """
-        root = glob_conf.config['EXP']['root']
-        name = glob_conf.config['EXP']['name']
+        root = self.config['EXP']['root']
+        name = self.config['EXP']['name']
         dir_name = f'{root}{name}'
         audeer.mkdir(dir_name)
         return dir_name
@@ -124,19 +127,19 @@ class Util:
         """
             Get a string as name from all databases that are useed
         """
-        return '_'.join(ast.literal_eval(glob_conf.config['DATA']['databases']))
+        return '_'.join(ast.literal_eval(self.config['DATA']['databases']))
     def get_exp_name(self, only_train = False, only_data = False):
         trains_val = self.config_val('DATA', 'trains', False)
         if only_train and trains_val:
             # try to get only the train tables
-            ds = '_'.join(ast.literal_eval(glob_conf.config['DATA']['trains']))
+            ds = '_'.join(ast.literal_eval(self.config['DATA']['trains']))
         else:
-            ds = '_'.join(ast.literal_eval(glob_conf.config['DATA']['databases']))
+            ds = '_'.join(ast.literal_eval(self.config['DATA']['databases']))
         mt = ''
         if not only_data:
-            mt = f'_{glob_conf.config["MODEL"]["type"]}'
-        ft = '_'.join(ast.literal_eval(glob_conf.config['FEATS']['type']))
+            mt = f'_{self.config["MODEL"]["type"]}'
+        ft = '_'.join(ast.literal_eval(self.config['FEATS']['type']))
         ft += '_'
         set = self.config_val('FEATS', 'set', False)
         set_string = ''
@@ -159,7 +162,7 @@ class Util:
     def get_plot_name(self):
         try:
-            plot_name = glob_conf.config['PLOT']['name']
+            plot_name = self.config['PLOT']['name']
         except KeyError:
             plot_name = self.get_exp_name()
         return plot_name
@@ -183,10 +186,10 @@ class Util:
     def set_config_val(self, section, key, value):
         try:
             # does the section already exists?
-            glob_conf.config[section][key] = str(value)
+            self.config[section][key] = str(value)
         except KeyError:
-            glob_conf.config.add_section(section)
-            glob_conf.config[section][key] = str(value)
+            self.config.add_section(section)
+            self.config[section][key] = str(value)
     def check_df(self, i, df):
         """Check a dataframe"""
@@ -195,7 +198,7 @@ class Util:
         )
     def config_val(self, section, key, default):
         try:
-            return glob_conf.config[section][key]
+            return self.config[section][key]
         except KeyError:
             if not default in self.stopvals:
                 self.debug(f'value for {key} not found, using default: {default}')
@@ -203,7 +206,7 @@ class Util:
     def config_val_list(self, section, key, default):
         try:
-            return ast.literal_eval(glob_conf.config[section][key])
+            return ast.literal_eval(self.config[section][key])
         except KeyError:
             if not default in self.stopvals:
                 self.debug(f'value for {key} not found, using default: {default}')
@@ -213,11 +216,11 @@ class Util:
         # try:
         #     labels = glob_conf.label_encoder.classes_
         # except AttributeError:
-        labels = ast.literal_eval(glob_conf.config['DATA']['labels'])
+        labels = ast.literal_eval(self.config['DATA']['labels'])
         return labels
     def continuous_to_categorical(self, array):
-        bins = ast.literal_eval(glob_conf.config['DATA']['bins'])
+        bins = ast.literal_eval(self.config['DATA']['bins'])
         result =  np.digitize(array, bins)-1
         return result

{nkululeko-0.61.0.dist-info → nkululeko-0.62.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.61.0
+Version: 0.62.0
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -253,6 +253,13 @@ Nkululeko can be used under the [MIT license](https://choosealicense.com/license
 Changelog
 =========
+Version 0.62.0
+--------------
+* fixed segment silero bug
+* added all Wav2vec2 models
+* added resampler module
+* added error on file for embeddings
 Version 0.61.0
 --------------
 * added HUBERT embeddings

{nkululeko-0.61.0.dist-info → nkululeko-0.62.0.dist-info}/RECORD RENAMED Viewed

@@ -2,12 +2,12 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/augment.py,sha256=ebv5QebGD8wLzXInvusjn4kFlET6-yXkYoF132BrubQ,1750
 nkululeko/balancer.py,sha256=64ftZN68sMDfkvuovCDHpAHmSJgCO6Kdk9bwmpSisec,12
 nkululeko/cacheddataset.py,sha256=bSJ_SDg7TxL89YL_pJXp-sFvdUXJtHuBTd5KSTE4AkQ,955
-nkululeko/constants.py,sha256=xSkfM3CWCYGD40D6GhHHCrxrw0JNK-372YdiRKMknWs,18
+nkululeko/constants.py,sha256=7u8AMX9auXDDDSpQnfb8--c2xIvy8IAsVaxENC-wi0k,18
 nkululeko/demo.py,sha256=nGP3fUDXuW1ZF12AzMpzRWXct0rdqYRJVNgA9B_QWwA,1821
 nkululeko/demo_predictor.py,sha256=VVxE2lf5lTkAP5qElG5U2bK6SdDzQ2Jmf0Vn_yHpSro,2302
 nkululeko/experiment.py,sha256=9qStgy31svY4bBVZOkuJ0JFjEQ1sIT2ibIdJ6IVlfTI,25063
 nkululeko/explore.py,sha256=aemOk5XYw7axQEJQfdABEUxN3img0NYSb8l6W-nDzZY,2090
-nkululeko/feature_extractor.py,sha256=2LqPIiDAoaBRhjcKik2hjBEBVBsLLxx8blQvTD43TRg,6324
+nkululeko/feature_extractor.py,sha256=h2kMJR6fElshCUXw_A6fg3gNbKRMXrZR4SGhaaQI4wk,6274
 nkululeko/file_checker.py,sha256=Nw05SIp7Ez1U9ZeFhNGz0XivwKr43hHg1WsfzKsrFPQ,3510
 nkululeko/filter_data.py,sha256=g7giEShbA-dr2ekVycW5WurFG-UaopJvDZWylKNZtpM,6717
 nkululeko/glob_conf.py,sha256=2Tl0NZQeVeydDO8T2tuJC2lCv99wvaTVRX9Dy1Mq_L4,237
@@ -16,16 +16,17 @@ nkululeko/nkululeko.py,sha256=O2Zw7u-Mb7VP9MPxAlhdTkXV2lW2kETIuSJp7mfj_Tc,1671
 nkululeko/plots.py,sha256=hoOFLbWXpV5jGDWHEpy345_4vpaGKGMAv2JwvpNUxkw,11454
 nkululeko/predict.py,sha256=3ei4wn2by0p9Vkv7cllMcszmEjSM2vX0T6x_5rlgT28,1851
 nkululeko/reporter.py,sha256=359aeQWt0ZGLseaJnOfafYG8BrwumiM2Q58DWiaoyWQ,10177
+nkululeko/resample.py,sha256=392zQEpRaWGwBvRBMXY9j2WtTTEaELk8HRPQ6ajzJSQ,2121
 nkululeko/result.py,sha256=Ey5FPsAyfnQVtzO_J6_4hkOAZ191YWmF_vXxlgNjCdc,406
 nkululeko/runmanager.py,sha256=ll04dEu5Y1nOi8QOtmSiw3oxzcXeASdQsg0t-vxCny8,6765
 nkululeko/scaler.py,sha256=6NQHbSQZO9HIfhYNlliuDRywjaEH_FVKHRskTJ2xr90,3021
-nkululeko/segment.py,sha256=GGyovnZ75Sqd8TgBH5fi3fjRkVw_ygqBQD46Yn6GVQ4,4660
+nkululeko/segment.py,sha256=YcrSQ_iP80inPZbXN4jFpKbMmGxI_Xcev5IfK0GSdJ0,4349
 nkululeko/syllable_nuclei.py,sha256=vK9dj5deqRnyEmlZmhFtKPzqKVGNCgTnWaG8UDITKNg,9913
 nkululeko/test.py,sha256=BbHGliDChAXqMe2oA579dJpyZSlPGAm5997lX_POboQ,1372
 nkululeko/test_predictor.py,sha256=QwdAVPHNew9w5PD_sPFhhWVDTYRAbUE6fkAp58X8Hjg,2410
-nkululeko/util.py,sha256=gZrNTF4C7hKkEMCC_hoNkEAhAViWzWebP8LsHRew7s4,9731
-nkululeko-0.61.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.61.0.dist-info/METADATA,sha256=LXJjW9KpGkPum60eGuYW__gl5QKXVRhnm6RySrKo2b8,21680
-nkululeko-0.61.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
-nkululeko-0.61.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.61.0.dist-info/RECORD,,
+nkululeko/util.py,sha256=VE8HbcdUkfHbo1xQNVznyemoE-OVeWgDwsjbZpEBqUA,9840
+nkululeko-0.62.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.62.0.dist-info/METADATA,sha256=lQogxZd1eFD1PEKzwujzUSHQU3sju2IY3v7QQM0ddLE,21828
+nkululeko-0.62.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
+nkululeko-0.62.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.62.0.dist-info/RECORD,,

{nkululeko-0.61.0.dist-info → nkululeko-0.62.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.61.0.dist-info → nkululeko-0.62.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{nkululeko-0.61.0.dist-info → nkululeko-0.62.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.61.0__py3-none-any.whl → 0.62.0__py3-none-any.whl

nkululeko 0.61.0py3-none-any.whl → 0.62.0py3-none-any.whl