PyPI - nkululeko - Versions diffs - 0.45.0__tar.gz → 0.45.2__tar.gz - Mend

nkululeko 0.45.0tar.gz → 0.45.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

{nkululeko-0.45.0 → nkululeko-0.45.2}/CHANGELOG.md RENAMED Viewed

@@ -6,6 +6,13 @@ Version 0.44.1
 * bugfixing: feature importance: https://github.com/felixbur/nkululeko/issues/23
 * bugfixing: loading csv database with filewise index https://github.com/felixbur/nkululeko/issues/24
+Version 0.45.2
+--------------
+* added sample_selection for sample distribution plots
+Version 0.45.1
+--------------
+* fixed dataframe.append bug
 Version 0.45.0
 --------------

{nkululeko-0.45.0/nkululeko.egg-info → nkululeko-0.45.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.45.0
+Version: 0.45.2
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -123,7 +123,6 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
 * [Combine feature sets](http://blog.syntheticspeech.de/2022/06/30/how-to-combine-feature-sets-with-nkululeko/)
 * [Classifying continuous variables](http://blog.syntheticspeech.de/2022/01/26/nkululeko-classifying-continuous-variables/)
 * [Try out / demo a trained model](http://blog.syntheticspeech.de/2022/01/24/nkululeko-try-out-demo-a-trained-model/)
-* [Plot distributions of feature values](http://blog.syntheticspeech.de/2023/02/16/nkululeko-how-to-plot-distributions-of-feature-values/)
 * [Perform cross database experiments](http://blog.syntheticspeech.de/2021/10/05/nkululeko-perform-cross-database-experiments/)
 * [Meta parameter optimization](http://blog.syntheticspeech.de/2021/09/03/perform-optimization-with-nkululeko/)
 * [How to set up wav2vec embedding](http://blog.syntheticspeech.de/2021/12/03/how-to-set-up-wav2vec-embedding-for-nkululeko/)
@@ -217,6 +216,13 @@ Version 0.44.1
 * bugfixing: feature importance: https://github.com/felixbur/nkululeko/issues/23
 * bugfixing: loading csv database with filewise index https://github.com/felixbur/nkululeko/issues/24
+Version 0.45.2
+--------------
+* added sample_selection for sample distribution plots
+Version 0.45.1
+--------------
+* fixed dataframe.append bug
 Version 0.45.0
 --------------

{nkululeko-0.45.0 → nkululeko-0.45.2}/README.md RENAMED Viewed

@@ -107,7 +107,6 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
 * [Combine feature sets](http://blog.syntheticspeech.de/2022/06/30/how-to-combine-feature-sets-with-nkululeko/)
 * [Classifying continuous variables](http://blog.syntheticspeech.de/2022/01/26/nkululeko-classifying-continuous-variables/)
 * [Try out / demo a trained model](http://blog.syntheticspeech.de/2022/01/24/nkululeko-try-out-demo-a-trained-model/)
-* [Plot distributions of feature values](http://blog.syntheticspeech.de/2023/02/16/nkululeko-how-to-plot-distributions-of-feature-values/)
 * [Perform cross database experiments](http://blog.syntheticspeech.de/2021/10/05/nkululeko-perform-cross-database-experiments/)
 * [Meta parameter optimization](http://blog.syntheticspeech.de/2021/09/03/perform-optimization-with-nkululeko/)
 * [How to set up wav2vec embedding](http://blog.syntheticspeech.de/2021/12/03/how-to-set-up-wav2vec-embedding-for-nkululeko/)

nkululeko-0.45.2/nkululeko/constants.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ VERSION = '0.45.2'

{nkululeko-0.45.0 → nkululeko-0.45.2}/nkululeko/experiment.py RENAMED Viewed

@@ -247,17 +247,20 @@ class Experiment:
     def plot_distribution(self):
         """Plot the distribution of samples and speaker per target class and biological sex"""
         plot = Plots()
-        if self.util.exp_is_classification():
-            # self.df_train['labels'] = self.label_encoder.inverse_transform(self.df_train[self.target])
-            # if self.df_test.is_labeled:
-            #     self.df_test['labels'] = self.label_encoder.inverse_transform(self.df_test[self.target])
-            if self.df_test.shape[0] > 0:
-                plot.describe_df('dev_set', self.df_test, self.target, f'test_distplot')
-            plot.describe_df('train_set', self.df_train, self.target, f'train_distplot')
+        sample_selection = self.util.config_val('EXPL', 'sample_selection', 'all')
+        if sample_selection=='all':
+            df_labels = pd.concat([self.df_train, self.df_test])
+            self.util.copy_flags(self.df_train, df_labels)
+        elif sample_selection=='train':
+            df_labels = self.df_train
+            self.util.copy_flags(self.df_train, df_labels)
+        elif sample_selection=='test':
+            df_labels = self.df_test
+            self.util.copy_flags(self.df_test, df_labels)
         else:
-            if self.df_test.shape[0] > 0:
-                plot.describe_df('dev_set', self.df_test, self.target, f'test_distplot')
-            plot.describe_df('train_set', self.df_train, self.target, f'train_distplot')
+            self.util.error(f'unkown sample selection specifier {sample_selection}, should be [all | train | test]')
+        plot.describe_df(f'{sample_selection}_set', df_labels, self.target, f'{sample_selection}_distplot')
     def extract_test_feats(self):
         self.feats_test = pd.DataFrame()
@@ -303,7 +306,7 @@ class Experiment:
     #     self.df_train = self.df_train.append(df_train_aug)
-    def analyse_features(self):
+    def analyse_features(self, needs_feats):
         """
         Do a feature exploration
@@ -311,7 +314,8 @@ class Experiment:
         if self.util.config_val('EXPL', 'value_counts', False):
             self.plot_distribution()
+        if not needs_feats:
+            return
         sample_selection = self.util.config_val('EXPL', 'sample_selection', 'False')
         if sample_selection=='all':
             df_feats = pd.concat([self.feats_train, self.feats_test])
@@ -325,7 +329,7 @@ class Experiment:
         elif sample_selection=='False':
             pass
         else:
-            self.util.error(f'unkown feature_distribution specifier {sample_selection}, should be [all | train | test]')
+            self.util.error(f'unkown sample selection specifier {sample_selection}, should be [all | train | test]')
         if sample_selection in ('all', 'train', 'test'):
             feat_analyser = FeatureAnalyser(sample_selection, df_labels, df_feats)
             feat_analyser.analyse()
@@ -336,8 +340,8 @@ class Experiment:
             scatters = ast.literal_eval(glob_conf.config['EXPL']['scatter'])
             if self.util.exp_is_classification():
                 plots = Plots()
-                all_feats =self.feats_train.append(self.feats_test)
-                all_labels = self.df_train['class_label'].append(self.df_test['class_label'])
+                all_feats = pd.concat([self.feats_train, self.feats_test])
+                all_labels = pd.concat([self.df_train['class_label'], self.df_test['class_label']])
                 for scatter in scatters:
                     plots.scatter_plot(all_feats, all_labels, scatter)
             else:

{nkululeko-0.45.0 → nkululeko-0.45.2}/nkululeko/explore.py RENAMED Viewed

@@ -42,12 +42,13 @@ def main(src_dir):
     scatter = eval(util.config_val('EXPL', 'scatter', 'False'))
     model_type = util.config_val('EXPL', 'model', False)
     plot_tree = eval(util.config_val('EXPL', 'plot_tree', 'False'))
+    needs_feats = False
     if plot_feats or tsne or scatter or model_type or plot_tree:
         # these investigations need features to explore
         expr.extract_feats()
+        needs_feats = True
     # explore
-    expr.analyse_features()
+    expr.analyse_features(needs_feats)
     print('DONE')

{nkululeko-0.45.0 → nkululeko-0.45.2}/nkululeko/util.py RENAMED Viewed

@@ -226,4 +226,9 @@ class Util:
         elif format == 'csv':
             return audformat.utils.read_csv(name)
         else:
-            self.error(f'unkown store format: {format}')
+            self.error(f'unkown store format: {format}')
+    def copy_flags(self, df_source, df_target):
+        df_target.is_labeled = df_source.is_labeled
+        df_target.got_gender = df_source.got_gender
+        df_target.got_speaker = df_source.got_speaker

{nkululeko-0.45.0 → nkululeko-0.45.2/nkululeko.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.45.0
+Version: 0.45.2
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -123,7 +123,6 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
 * [Combine feature sets](http://blog.syntheticspeech.de/2022/06/30/how-to-combine-feature-sets-with-nkululeko/)
 * [Classifying continuous variables](http://blog.syntheticspeech.de/2022/01/26/nkululeko-classifying-continuous-variables/)
 * [Try out / demo a trained model](http://blog.syntheticspeech.de/2022/01/24/nkululeko-try-out-demo-a-trained-model/)
-* [Plot distributions of feature values](http://blog.syntheticspeech.de/2023/02/16/nkululeko-how-to-plot-distributions-of-feature-values/)
 * [Perform cross database experiments](http://blog.syntheticspeech.de/2021/10/05/nkululeko-perform-cross-database-experiments/)
 * [Meta parameter optimization](http://blog.syntheticspeech.de/2021/09/03/perform-optimization-with-nkululeko/)
 * [How to set up wav2vec embedding](http://blog.syntheticspeech.de/2021/12/03/how-to-set-up-wav2vec-embedding-for-nkululeko/)
@@ -217,6 +216,13 @@ Version 0.44.1
 * bugfixing: feature importance: https://github.com/felixbur/nkululeko/issues/23
 * bugfixing: loading csv database with filewise index https://github.com/felixbur/nkululeko/issues/24
+Version 0.45.2
+--------------
+* added sample_selection for sample distribution plots
+Version 0.45.1
+--------------
+* fixed dataframe.append bug
 Version 0.45.0
 --------------

{nkululeko-0.45.0 → nkululeko-0.45.2}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = nkululeko
-version = 0.45.0
+version = 0.45.2
 author = Felix Burkhardt
 author_email = fxburk@gmail.com
 description = Machine learning audio prediction experiments based on templates