PyPI - nkululeko - Versions diffs - 0.94.3__py3-none-any.whl → 0.95.1__py3-none-any.whl - Mend

nkululeko 0.94.3py3-none-any.whl → 0.95.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

nkululeko/augmenting/resampler.py +5 -2
nkululeko/autopredict/ap_emotion.py +36 -0
nkululeko/autopredict/ap_text.py +45 -0
nkululeko/autopredict/tests/__init__.py +0 -0
nkululeko/autopredict/tests/test_whisper_transcriber.py +122 -0
nkululeko/autopredict/whisper_transcriber.py +81 -0
nkululeko/balance.py +222 -0
nkululeko/constants.py +1 -1
nkululeko/experiment.py +53 -3
nkululeko/explore.py +32 -13
nkululeko/feat_extract/feats_analyser.py +45 -17
nkululeko/feat_extract/feats_emotion2vec.py +51 -26
nkululeko/feat_extract/feats_praat.py +3 -3
nkululeko/feat_extract/feats_praat_core.py +769 -0
nkululeko/feat_extract/tests/__init__.py +1 -0
nkululeko/feat_extract/tests/test_feats_opensmile.py +162 -0
nkululeko/feat_extract/tests/test_feats_praat_core.py +507 -0
nkululeko/glob_conf.py +9 -0
nkululeko/modelrunner.py +15 -39
nkululeko/models/model.py +4 -42
nkululeko/models/model_tuned.py +416 -84
nkululeko/models/model_xgb.py +148 -2
nkululeko/models/tests/test_model_knn.py +49 -0
nkululeko/models/tests/test_model_mlp.py +153 -0
nkululeko/models/tests/test_model_xgb.py +33 -0
nkululeko/nkululeko.py +0 -9
nkululeko/plots.py +25 -19
nkululeko/predict.py +8 -6
nkululeko/reporting/report.py +7 -5
nkululeko/reporting/reporter.py +20 -5
nkululeko/test_predictor.py +7 -1
nkululeko/tests/__init__.py +1 -0
nkululeko/tests/test_balancing.py +270 -0
nkululeko/utils/util.py +38 -6
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/METADATA +1 -1
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/RECORD +40 -27
nkululeko/feat_extract/feats_opensmile copy.py +0 -93
nkululeko/feat_extract/feinberg_praat.py +0 -628
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/WHEEL +0 -0
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/entry_points.txt +0 -0
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/licenses/LICENSE +0 -0
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/top_level.txt +0 -0

nkululeko/augmenting/resampler.py CHANGED Viewed

@@ -68,7 +68,9 @@ class Resampler:
                 self.df.index.set_levels(new_files, level="file")
             )
             if not self.not_testing:
-                target_file = self.util.config_val("RESAMPLE", "target", "resampled.csv")
+                target_file = self.util.config_val(
+                    "RESAMPLE", "target", "resampled.csv"
+                )
                 # remove encoded labels
                 target = self.util.config_val("DATA", "target", "emotion")
                 if "class_label" in self.df.columns:
@@ -77,7 +79,8 @@ class Resampler:
                 # save file
                 self.df.to_csv(target_file)
                 self.util.debug(
-                    "saved resampled list of files to" f" {os.path.abspath(target_file)}"
+                    "saved resampled list of files to"
+                    f" {os.path.abspath(target_file)}"
                 )
             else:
                 # When running from command line, save to simple resampled.csv

nkululeko/autopredict/ap_emotion.py ADDED Viewed

@@ -0,0 +1,36 @@
+"""
+A predictor for emotion classification.
+Uses emotion2vec models for emotion prediction.
+"""
+import ast
+import nkululeko.glob_conf as glob_conf
+from nkululeko.feature_extractor import FeatureExtractor
+from nkululeko.utils.util import Util
+class EmotionPredictor:
+    """
+    EmotionPredictor
+    predicting emotion with emotion2vec models
+    """
+    def __init__(self, df):
+        self.df = df
+        self.util = Util("emotionPredictor")
+    def predict(self, split_selection):
+        self.util.debug(f"predicting emotion for {split_selection} samples")
+        feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
+        self.feature_extractor = FeatureExtractor(
+            self.df, ["emotion2vec-large"], feats_name, split_selection
+        )
+        emotion_df = self.feature_extractor.extract()
+        pred_emotion = ["neutral"] * len(emotion_df)
+        return_df = self.df.copy()
+        return_df["emotion_pred"] = pred_emotion
+        return return_df

nkululeko/autopredict/ap_text.py ADDED Viewed

@@ -0,0 +1,45 @@
+"""A predictor for text.
+Currently based on whisper model.
+"""
+import ast
+import torch
+from nkululeko.feature_extractor import FeatureExtractor
+import nkululeko.glob_conf as glob_conf
+from nkululeko.utils.util import Util
+class TextPredictor:
+    """TextPredictor.
+    predicting text with the whisper model
+    """
+    def __init__(self, df, util=None):
+        self.df = df
+        if util is not None:
+            self.util = util
+        else:
+            # create a new util instance
+            # this is needed to access the config and other utilities
+            # in the autopredict module
+            self.util = Util("textPredictor")
+        from nkululeko.autopredict.whisper_transcriber import Transcriber
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        device = self.util.config_val("MODEL", "device", device)
+        self.transcriber = Transcriber(
+            device=device,
+            language=self.util.config_val("EXP", "language", "en"),
+            util=self.util,
+        )
+    def predict(self, split_selection):
+        self.util.debug(f"predicting text for {split_selection} samples")
+        df = self.transcriber.transcribe_index(
+            self.df.index
+        )
+        return_df = self.df.copy()
+        return_df["text"] = df["text"].values
+        return return_df

nkululeko/autopredict/tests/__init__.py ADDED Viewed

File without changes

nkululeko/autopredict/tests/test_whisper_transcriber.py ADDED Viewed

@@ -0,0 +1,122 @@
+import os
+import tempfile
+from datetime import timedelta
+from unittest.mock import MagicMock, Mock, patch
+import numpy as np
+import pandas as pd
+import pytest
+from nkululeko.autopredict.whisper_transcriber import Transcriber
+class TestTranscriber:
+    @patch('nkululeko.autopredict.whisper_transcriber.whisper.load_model')
+    @patch('nkululeko.autopredict.whisper_transcriber.torch.cuda.is_available')
+    def test_init_default_device(self, mock_cuda, mock_load_model):
+        mock_cuda.return_value = True
+        mock_model = Mock()
+        mock_load_model.return_value = mock_model
+        transcriber = Transcriber()
+        mock_load_model.assert_called_once_with("turbo", device="cuda")
+        assert transcriber.language == "en"
+        assert transcriber.model == mock_model
+    @patch('nkululeko.autopredict.whisper_transcriber.whisper.load_model')
+    def test_init_custom_params(self, mock_load_model):
+        mock_model = Mock()
+        mock_load_model.return_value = mock_model
+        mock_util = Mock()
+        transcriber = Transcriber(model_name="base", device="cpu", language="es", util=mock_util)
+        mock_load_model.assert_called_once_with("base", device="cpu")
+        assert transcriber.language == "es"
+        assert transcriber.util == mock_util
+    def test_transcribe_file(self):
+        mock_model = Mock()
+        mock_model.transcribe.return_value = {"text": "  Hello world  "}
+        transcriber = Transcriber()
+        transcriber.model = mock_model
+        result = transcriber.transcribe_file("test.wav")
+        mock_model.transcribe.assert_called_once_with("test.wav", language="en", without_timestamps=True)
+        assert result == "Hello world"
+    @patch('nkululeko.autopredict.whisper_transcriber.audiofile.write')
+    def test_transcribe_array(self, mock_write):
+        transcriber = Transcriber()
+        transcriber.transcribe_file = Mock(return_value="transcribed text")
+        signal = np.array([0.1, 0.2, 0.3])
+        sampling_rate = 16000
+        result = transcriber.transcribe_array(signal, sampling_rate)
+        mock_write.assert_called_once_with("temp.wav", signal, sampling_rate, format="wav")
+        transcriber.transcribe_file.assert_called_once_with("temp.wav")
+        assert result == "transcribed text"
+    @patch('nkululeko.autopredict.whisper_transcriber.audiofile.read')
+    @patch('nkululeko.autopredict.whisper_transcriber.audeer.mkdir')
+    @patch('nkululeko.autopredict.whisper_transcriber.audeer.path')
+    @patch('nkululeko.autopredict.whisper_transcriber.audeer.basename_wo_ext')
+    @patch('nkululeko.autopredict.whisper_transcriber.os.path.isfile')
+    def test_transcribe_index_with_cache(self, mock_isfile, mock_basename, mock_path, mock_mkdir, mock_read):
+        mock_util = Mock()
+        mock_util.get_path.return_value = "/cache"
+        mock_util.read_json.return_value = {"transcription": "cached text"}
+        mock_mkdir.return_value = "/cache/transcriptions"
+        mock_path.side_effect = lambda *args: "/".join(args)
+        mock_basename.return_value = "file1"
+        mock_isfile.return_value = True
+        transcriber = Transcriber(util=mock_util)
+        index = pd.Index([
+            ("file1.wav", timedelta(seconds=0), timedelta(seconds=1))
+        ])
+        result = transcriber.transcribe_index(index)
+        assert isinstance(result, pd.DataFrame)
+        assert len(result) == 1
+        assert result.iloc[0]["text"] == "cached text"
+    @patch('nkululeko.autopredict.whisper_transcriber.whisper.load_model')
+    @patch('nkululeko.autopredict.whisper_transcriber.audiofile.read')
+    @patch('nkululeko.autopredict.whisper_transcriber.audeer.mkdir')
+    @patch('nkululeko.autopredict.whisper_transcriber.audeer.path')
+    @patch('nkululeko.autopredict.whisper_transcriber.audeer.basename_wo_ext')
+    @patch('nkululeko.autopredict.whisper_transcriber.os.path.isfile')
+    def test_transcribe_index_without_cache(self, mock_isfile, mock_basename, mock_path, mock_mkdir, mock_audioread, mock_load_model):
+        mock_util = Mock()
+        mock_util.get_path.return_value = "/cache"
+        mock_mkdir.return_value = "/cache/transcriptions"
+        mock_path.side_effect = lambda *args: "/".join(args)
+        mock_basename.return_value = "file1"
+        mock_isfile.return_value = False
+        mock_audioread.return_value = (np.array([0.1, 0.2]), 16000)
+        mock_load_model.return_value = Mock()
+        transcriber = Transcriber(util=mock_util)
+        transcriber.transcribe_array = Mock(return_value="new transcription")
+        index = pd.Index([
+            ("file1.wav", timedelta(seconds=0), timedelta(seconds=1))
+        ])
+        result = transcriber.transcribe_index(index)
+        mock_util.save_json.assert_called_once()
+        assert isinstance(result, pd.DataFrame)
+        assert len(result) == 1
+        assert result.iloc[0]["text"] == "new transcription"

nkululeko/autopredict/whisper_transcriber.py ADDED Viewed

@@ -0,0 +1,81 @@
+import os
+import pandas as pd
+import torch
+from tqdm import tqdm
+import whisper
+import audeer
+import audiofile
+from nkululeko.utils.util import Util
+class Transcriber:
+    def __init__(self, model_name="turbo", device=None, language="en", util=None):
+        if device is None:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model = whisper.load_model(model_name, device=device)
+        self.language = language
+        self.util = util
+    def transcribe_file(self, audio_path):
+        """Transcribe the audio file at the given path.
+        :param audio_path: Path to the audio file to transcribe.
+        :return: Transcription text.
+        """
+        result = self.model.transcribe(
+            audio_path, language=self.language, without_timestamps=True)
+        result = result["text"].strip()
+        return result
+    def transcribe_array(self, signal, sampling_rate):
+        """Transcribe the audio file at the given path.
+        :param audio_path: Path to the audio file to transcribe.
+        :return: Transcription text.
+        """
+        tmporary_path = "temp.wav"
+        audiofile.write(
+            "temp.wav", signal, sampling_rate, format="wav")
+        result = self.transcribe_file(tmporary_path)
+        return result
+    def transcribe_index(self, index:pd.Index) ->  pd.DataFrame:
+        """Transcribe the audio files in the given index.
+        :param index: Index containing tuples of (file, start, end).
+        :return: DataFrame with transcriptions indexed by the original index.
+        :rtype: pd.DataFrame
+        """
+        file_name = ""
+        seg_index = 0
+        transcriptions = []
+        transcriber_cache = audeer.mkdir(
+            audeer.path(self.util.get_path("cache"), "transcriptions"))
+        for idx, (file, start, end) in enumerate(
+            tqdm(index.to_list())
+        ):
+            if file != file_name:
+                file_name = file
+                seg_index = 0
+            cache_name = audeer.basename_wo_ext(file)+str(seg_index)
+            cache_path = audeer.path(transcriber_cache, cache_name + ".json")
+            if os.path.isfile(cache_path):
+                transcription = self.util.read_json(cache_path)["transcription"]
+            else:
+                dur = end.total_seconds() - start.total_seconds()
+                y, sr = audiofile.read(file, offset=start, duration=dur)
+                transcription = self.transcribe_array(
+                    y, sr)
+                self.util.save_json(cache_path,
+                                {"transcription": transcription,
+                                 "file": file,
+                                 "start": start.total_seconds(),
+                                 "end": end.total_seconds()})
+            transcriptions.append(transcription)
+            seg_index += 1
+        df = pd.DataFrame({"text":transcriptions}, index=index)
+        return df

nkululeko/balance.py ADDED Viewed

@@ -0,0 +1,222 @@
+# balance.py
+"""
+Data and feature balancing module for imbalanced datasets.
+This module provides a unified interface for various balancing techniques
+including over-sampling, under-sampling, and combination methods.
+"""
+import pandas as pd
+import numpy as np
+from nkululeko.utils.util import Util
+import nkululeko.glob_conf as glob_conf
+class DataBalancer:
+    """Class to handle data and feature balancing operations."""
+    def __init__(self, random_state=42):
+        """
+        Initialize the DataBalancer.
+        Args:
+            random_state (int): Random state for reproducible results
+        """
+        self.util = Util("data_balancer")
+        self.random_state = random_state
+        # Supported balancing algorithms
+        self.oversampling_methods = [
+            'ros',           # RandomOverSampler
+            'smote',         # SMOTE
+            'adasyn',        # ADASYN
+            'borderlinesmote',  # BorderlineSMOTE
+            'svmsmote'       # SVMSMOTE
+        ]
+        self.undersampling_methods = [
+            'clustercentroids',   # ClusterCentroids
+            'randomundersampler', # RandomUnderSampler
+            'editednearestneighbours',  # EditedNearestNeighbours
+            'tomeklinks'          # TomekLinks
+        ]
+        self.combination_methods = [
+            'smoteenn',      # SMOTEENN
+            'smotetomek'     # SMOTETomek
+        ]
+    def get_supported_methods(self):
+        """Get all supported balancing methods."""
+        return {
+            'oversampling': self.oversampling_methods,
+            'undersampling': self.undersampling_methods,
+            'combination': self.combination_methods
+        }
+    def is_valid_method(self, method):
+        """Check if a balancing method is supported."""
+        all_methods = (self.oversampling_methods +
+                      self.undersampling_methods +
+                      self.combination_methods)
+        return method.lower() in all_methods
+    def balance_features(self, df_train, feats_train, target_column, method):
+        """
+        Balance features using the specified method.
+        Args:
+            df_train (pd.DataFrame): Training dataframe with target labels
+            feats_train (np.ndarray or pd.DataFrame): Training features
+            target_column (str): Name of the target column
+            method (str): Balancing method to use
+        Returns:
+            tuple: (balanced_df, balanced_features)
+        """
+        if not self.is_valid_method(method):
+            available_methods = (self.oversampling_methods +
+                               self.undersampling_methods +
+                               self.combination_methods)
+            self.util.error(
+                f"Unknown balancing algorithm: {method}. "
+                f"Available methods: {available_methods}"
+            )
+            return df_train, feats_train
+        orig_size = len(df_train)
+        self.util.debug(f"Balancing features with: {method}")
+        self.util.debug(f"Original dataset size: {orig_size}")
+        # Get original class distribution
+        orig_dist = df_train[target_column].value_counts().to_dict()
+        self.util.debug(f"Original class distribution: {orig_dist}")
+        try:
+            # Apply the specified balancing method
+            X_res, y_res = self._apply_balancing_method(
+                feats_train, df_train[target_column], method
+            )
+            # Create new balanced dataframe
+            balanced_df = pd.DataFrame({target_column: y_res})
+            # If original dataframe has an index, try to preserve it
+            if hasattr(X_res, 'index'):
+                balanced_df.index = X_res.index
+            new_size = len(balanced_df)
+            new_dist = balanced_df[target_column].value_counts().to_dict()
+            self.util.debug(f"Balanced dataset size: {new_size} (was {orig_size})")
+            self.util.debug(f"New class distribution: {new_dist}")
+            # Log class distribution with label names if encoder is available
+            self._log_class_distribution(y_res, method)
+            return balanced_df, X_res
+        except Exception as e:
+            self.util.debug(f"Error applying {method} balancing: {str(e)}")
+            # Don't call sys.exit() in tests, just return original data
+            return df_train, feats_train
+    def _apply_balancing_method(self, features, targets, method):
+        """Apply the specific balancing method."""
+        method = method.lower()
+        # Over-sampling methods
+        if method == 'ros':
+            from imblearn.over_sampling import RandomOverSampler
+            sampler = RandomOverSampler(random_state=self.random_state)
+        elif method == 'smote':
+            from imblearn.over_sampling import SMOTE
+            sampler = SMOTE(random_state=self.random_state)
+        elif method == 'adasyn':
+            from imblearn.over_sampling import ADASYN
+            sampler = ADASYN(random_state=self.random_state)
+        elif method == 'borderlinesmote':
+            from imblearn.over_sampling import BorderlineSMOTE
+            sampler = BorderlineSMOTE(random_state=self.random_state)
+        elif method == 'svmsmote':
+            from imblearn.over_sampling import SVMSMOTE
+            sampler = SVMSMOTE(random_state=self.random_state)
+        # Under-sampling methods
+        elif method == 'clustercentroids':
+            from imblearn.under_sampling import ClusterCentroids
+            sampler = ClusterCentroids(random_state=self.random_state)
+        elif method == 'randomundersampler':
+            from imblearn.under_sampling import RandomUnderSampler
+            sampler = RandomUnderSampler(random_state=self.random_state)
+        elif method == 'editednearestneighbours':
+            from imblearn.under_sampling import EditedNearestNeighbours
+            sampler = EditedNearestNeighbours()
+        elif method == 'tomeklinks':
+            from imblearn.under_sampling import TomekLinks
+            sampler = TomekLinks()
+        # Combination methods
+        elif method == 'smoteenn':
+            from imblearn.combine import SMOTEENN
+            sampler = SMOTEENN(random_state=self.random_state)
+        elif method == 'smotetomek':
+            from imblearn.combine import SMOTETomek
+            sampler = SMOTETomek(random_state=self.random_state)
+        else:
+            raise ValueError(f"Unsupported balancing method: {method}")
+        # Apply the balancing
+        X_res, y_res = sampler.fit_resample(features, targets)
+        return X_res, y_res
+    def _log_class_distribution(self, y_res, method):
+        """Log class distribution with label names if possible."""
+        # Check if label encoder is available for pretty printing
+        if (hasattr(glob_conf, "label_encoder") and
+            glob_conf.label_encoder is not None):
+            try:
+                le = glob_conf.label_encoder
+                res = pd.Series(y_res).value_counts()
+                resd = {}
+                for i, label_idx in enumerate(res.index.values):
+                    label_name = le.inverse_transform([label_idx])[0]
+                    resd[label_name] = res.values[i]
+                self.util.debug(f"Class distribution after {method} balancing: {resd}")
+            except Exception as e:
+                self.util.debug(
+                    f"Could not decode class labels: {e}. "
+                    f"Showing numeric distribution: {pd.Series(y_res).value_counts().to_dict()}"
+                )
+        else:
+            self.util.debug(
+                f"Label encoder not available. "
+                f"Class distribution after {method} balancing: {pd.Series(y_res).value_counts().to_dict()}"
+            )
+class LegacyDataBalancer:
+    """Legacy data balancer for backward compatibility."""
+    def __init__(self):
+        self.util = Util("legacy_data_balancer")
+    def balance_data(self, df_train, df_test):
+        """
+        Legacy method for data balancing (kept for backward compatibility).
+        This method should be replaced by the new DataBalancer class.
+        """
+        self.util.debug("Using legacy data balancing method")
+        # Implementation for legacy balance_data method would go here
+        # For now, just return the original data unchanged
+        return df_train, df_test

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.94.3"
+VERSION="0.95.1"
 SAMPLING_RATE = 16000

nkululeko/experiment.py CHANGED Viewed

@@ -513,7 +513,7 @@ class Experiment:
     def autopredict(self):
         """Predict labels for samples with existing models and add to the dataframe."""
-        sample_selection = self.util.config_val("PREDICT", "split", "all")
+        sample_selection = self.util.config_val("PREDICT", "sample_selection", "all")
         if sample_selection == "all":
             df = pd.concat([self.df_train, self.df_test])
         elif sample_selection == "train":
@@ -569,6 +569,11 @@ class Experiment:
                 predictor = STOIPredictor(df)
                 df = predictor.predict(sample_selection)
+            elif target == "text":
+                from nkululeko.autopredict.ap_text import TextPredictor
+                predictor = TextPredictor(df, self.util)
+                df = predictor.predict(sample_selection)
             elif target == "arousal":
                 from nkululeko.autopredict.ap_arousal import ArousalPredictor
@@ -584,6 +589,11 @@ class Experiment:
                 predictor = DominancePredictor(df)
                 df = predictor.predict(sample_selection)
+            elif target == "emotion":
+                from nkululeko.autopredict.ap_emotion import EmotionPredictor
+                predictor = EmotionPredictor(df)
+                df = predictor.predict(sample_selection)
             else:
                 self.util.error(f"unknown auto predict target: {target}")
         return df
@@ -668,11 +678,27 @@ class Experiment:
         # check if a scatterplot should be done
         scatter_var = eval(self.util.config_val("EXPL", "scatter", "False"))
+        # Priority: use [EXPL][scatter.target] if available, otherwise use [DATA][target] value
+        if hasattr(self, "target") and self.target != "none":
+            default_scatter_target = f"['{self.target}']"
+        else:
+            default_scatter_target = "['class_label']"
         scatter_target = self.util.config_val(
-            "EXPL", "scatter.target", "['class_label']"
+            "EXPL", "scatter.target", default_scatter_target
         )
+        if scatter_target == default_scatter_target:
+            self.util.debug(
+                f"scatter.target using default from [DATA][target]: {scatter_target}"
+            )
+        else:
+            self.util.debug(
+                f"scatter.target from [EXPL][scatter.target]: {scatter_target}"
+            )
         if scatter_var:
-            scatters = ast.literal_eval(glob_conf.config["EXPL"]["scatter"])
+            scatters = ast.literal_eval(scatter_target)
             scat_targets = ast.literal_eval(scatter_target)
             plots = Plots()
             for scat_target in scat_targets:
@@ -692,6 +718,30 @@ class Experiment:
                             df_feats, df_labels, f"{scat_target}_bins", scatter
                         )
+        # check if t-SNE plot should be generated
+        tsne = eval(self.util.config_val("EXPL", "tsne", "False"))
+        if tsne:
+            target_column = self.util.config_val("DATA", "target", "emotion")
+            plots = Plots()
+            self.util.debug("generating t-SNE plot...")
+            plots.scatter_plot(df_feats, df_labels, target_column, "tsne")
+        # check if UMAP plot should be generated
+        umap_plot = eval(self.util.config_val("EXPL", "umap", "False"))
+        if umap_plot:
+            target_column = self.util.config_val("DATA", "target", "emotion")
+            plots = Plots()
+            self.util.debug("generating UMAP plot...")
+            plots.scatter_plot(df_feats, df_labels, target_column, "umap")
+        # check if PCA plot should be generated
+        pca_plot = eval(self.util.config_val("EXPL", "pca", "False"))
+        if pca_plot:
+            target_column = self.util.config_val("DATA", "target", "emotion")
+            plots = Plots()
+            self.util.debug("generating PCA plot...")
+            plots.scatter_plot(df_feats, df_labels, target_column, "pca")
     def _check_scale(self):
         self.util.save_to_store(self.feats_train, "feats_train")
         self.util.save_to_store(self.feats_test, "feats_test")

nkululeko 0.94.3__py3-none-any.whl → 0.95.1__py3-none-any.whl

nkululeko 0.94.3py3-none-any.whl → 0.95.1py3-none-any.whl