PyPI - nkululeko - Versions diffs - 0.94.3__py3-none-any.whl → 0.95.1__py3-none-any.whl - Mend

nkululeko 0.94.3py3-none-any.whl → 0.95.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

nkululeko/augmenting/resampler.py +5 -2
nkululeko/autopredict/ap_emotion.py +36 -0
nkululeko/autopredict/ap_text.py +45 -0
nkululeko/autopredict/tests/__init__.py +0 -0
nkululeko/autopredict/tests/test_whisper_transcriber.py +122 -0
nkululeko/autopredict/whisper_transcriber.py +81 -0
nkululeko/balance.py +222 -0
nkululeko/constants.py +1 -1
nkululeko/experiment.py +53 -3
nkululeko/explore.py +32 -13
nkululeko/feat_extract/feats_analyser.py +45 -17
nkululeko/feat_extract/feats_emotion2vec.py +51 -26
nkululeko/feat_extract/feats_praat.py +3 -3
nkululeko/feat_extract/feats_praat_core.py +769 -0
nkululeko/feat_extract/tests/__init__.py +1 -0
nkululeko/feat_extract/tests/test_feats_opensmile.py +162 -0
nkululeko/feat_extract/tests/test_feats_praat_core.py +507 -0
nkululeko/glob_conf.py +9 -0
nkululeko/modelrunner.py +15 -39
nkululeko/models/model.py +4 -42
nkululeko/models/model_tuned.py +416 -84
nkululeko/models/model_xgb.py +148 -2
nkululeko/models/tests/test_model_knn.py +49 -0
nkululeko/models/tests/test_model_mlp.py +153 -0
nkululeko/models/tests/test_model_xgb.py +33 -0
nkululeko/nkululeko.py +0 -9
nkululeko/plots.py +25 -19
nkululeko/predict.py +8 -6
nkululeko/reporting/report.py +7 -5
nkululeko/reporting/reporter.py +20 -5
nkululeko/test_predictor.py +7 -1
nkululeko/tests/__init__.py +1 -0
nkululeko/tests/test_balancing.py +270 -0
nkululeko/utils/util.py +38 -6
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/METADATA +1 -1
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/RECORD +40 -27
nkululeko/feat_extract/feats_opensmile copy.py +0 -93
nkululeko/feat_extract/feinberg_praat.py +0 -628
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/WHEEL +0 -0
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/entry_points.txt +0 -0
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/licenses/LICENSE +0 -0
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/top_level.txt +0 -0

nkululeko/tests/test_balancing.py ADDED Viewed

@@ -0,0 +1,270 @@
+#!/usr/bin/env python3
+"""
+Simple and comprehensive test suite for all balancing methods in DataBalancer.
+Tests all 11 balancing methods from balance.py:
+Oversampling (5): ros, smote, adasyn, borderlinesmote, svmsmote
+Undersampling (4): clustercentroids, randomundersampler, editednearestneighbours, tomeklinks
+Combination (2): smoteenn, smotetomek
+Run with: pytest nkululeko/tests/test_balancing.py -v
+"""
+import numpy as np
+import pandas as pd
+import pytest
+from nkululeko.balance import DataBalancer
+import nkululeko.glob_conf as glob_conf
+@pytest.fixture
+def sample_data():
+    """Create sample imbalanced data that works with all methods"""
+    np.random.seed(42)
+    # Majority class: 100 samples, Minority class: 25 samples
+    # Well-separated for better algorithm performance
+    majority_features = np.random.randn(100, 10)
+    minority_features = np.random.randn(25, 10) + 3  # Good separation
+    features = np.vstack([majority_features, minority_features])
+    labels = np.array([0] * 100 + [1] * 25)
+    df_train = pd.DataFrame({'target': labels})
+    feats_train = features
+    return df_train, feats_train
+@pytest.fixture
+def mock_config():
+    """Mock configuration for testing"""
+    original_config = getattr(glob_conf, 'config', None)
+    glob_conf.config = {
+        'FEATS': {'balancing': 'smote'},
+        'DATA': {'target': 'target'},
+        'MODEL': {'type': 'mlp'}
+    }
+    yield glob_conf.config
+    if original_config is not None:
+        glob_conf.config = original_config
+class TestDataBalancer:
+    """Simple test suite for DataBalancer - tests all 11 methods"""
+    def test_initialization(self):
+        """Test 1: DataBalancer can be initialized"""
+        balancer = DataBalancer(random_state=42)
+        assert balancer is not None
+        assert balancer.random_state == 42
+    def test_get_all_supported_methods(self):
+        """Test 2: All 11 methods are reported as supported"""
+        balancer = DataBalancer()
+        methods = balancer.get_supported_methods()
+        # Check we have all 3 categories
+        assert 'oversampling' in methods
+        assert 'undersampling' in methods
+        assert 'combination' in methods
+        # Check exact counts
+        assert len(methods['oversampling']) == 5
+        assert len(methods['undersampling']) == 4
+        assert len(methods['combination']) == 2
+        # Total should be 11
+        total = (len(methods['oversampling']) +
+                len(methods['undersampling']) +
+                len(methods['combination']))
+        assert total == 11
+    def test_method_validation(self):
+        """Test 3: Method validation works correctly"""
+        balancer = DataBalancer()
+        # Valid methods
+        assert balancer.is_valid_method('ros') == True
+        assert balancer.is_valid_method('smote') == True
+        assert balancer.is_valid_method('clustercentroids') == True
+        assert balancer.is_valid_method('smoteenn') == True
+        # Invalid methods
+        assert balancer.is_valid_method('invalid') == False
+        assert balancer.is_valid_method('') == False
+    def test_all_oversampling_methods(self, sample_data, mock_config):
+        """Test 4: All 5 oversampling methods work"""
+        df_train, feats_train = sample_data
+        balancer = DataBalancer(random_state=42)
+        oversampling_methods = ['ros', 'smote', 'adasyn', 'borderlinesmote', 'svmsmote']
+        for method in oversampling_methods:
+            print(f"Testing oversampling: {method}")
+            balanced_df, balanced_features = balancer.balance_features(
+                df_train=df_train,
+                feats_train=feats_train,
+                target_column='target',
+                method=method
+            )
+            # Basic checks
+            assert len(balanced_df) >= len(df_train), f"{method} should increase/maintain size"
+            assert len(balanced_df) == len(balanced_features), f"{method} length mismatch"
+            assert balanced_features.shape[1] == feats_train.shape[1], f"{method} feature dim changed"
+            print(f"✓ {method} passed")
+    def test_all_undersampling_methods(self, sample_data, mock_config):
+        """Test 5: All 4 undersampling methods work"""
+        df_train, feats_train = sample_data
+        balancer = DataBalancer(random_state=42)
+        undersampling_methods = ['clustercentroids', 'randomundersampler',
+                               'editednearestneighbours', 'tomeklinks']
+        for method in undersampling_methods:
+            print(f"Testing undersampling: {method}")
+            balanced_df, balanced_features = balancer.balance_features(
+                df_train=df_train,
+                feats_train=feats_train,
+                target_column='target',
+                method=method
+            )
+            # Basic checks
+            assert len(balanced_df) <= len(df_train), f"{method} should decrease/maintain size"
+            assert len(balanced_df) == len(balanced_features), f"{method} length mismatch"
+            assert balanced_features.shape[1] == feats_train.shape[1], f"{method} feature dim changed"
+            print(f"✓ {method} passed")
+    def test_all_combination_methods(self, sample_data, mock_config):
+        """Test 6: All 2 combination methods work"""
+        df_train, feats_train = sample_data
+        balancer = DataBalancer(random_state=42)
+        combination_methods = ['smoteenn', 'smotetomek']
+        for method in combination_methods:
+            print(f"Testing combination: {method}")
+            balanced_df, balanced_features = balancer.balance_features(
+                df_train=df_train,
+                feats_train=feats_train,
+                target_column='target',
+                method=method
+            )
+            # Basic checks
+            assert len(balanced_df) == len(balanced_features), f"{method} length mismatch"
+            assert balanced_features.shape[1] == feats_train.shape[1], f"{method} feature dim changed"
+            assert len(balanced_df) > 0, f"{method} resulted in empty dataset"
+            print(f"✓ {method} passed")
+    def test_all_11_methods_comprehensive(self, sample_data, mock_config):
+        """Test 7: All 11 methods work in one comprehensive test"""
+        df_train, feats_train = sample_data
+        balancer = DataBalancer(random_state=42)
+        # Get all methods from the balancer itself
+        all_methods = balancer.get_supported_methods()
+        successful_methods = []
+        failed_methods = []
+        print("Testing all 11 balancing methods...")
+        for category, methods in all_methods.items():
+            for method in methods:
+                try:
+                    balanced_df, balanced_features = balancer.balance_features(
+                        df_train=df_train,
+                        feats_train=feats_train,
+                        target_column='target',
+                        method=method
+                    )
+                    # Verify results
+                    assert len(balanced_df) == len(balanced_features)
+                    assert balanced_features.shape[1] == feats_train.shape[1]
+                    assert len(balanced_df) > 0
+                    successful_methods.append(method)
+                    print(f"✓ {method} succeeded")
+                except Exception as e:
+                    failed_methods.append((method, str(e)))
+                    print(f"✗ {method} failed: {str(e)}")
+        print(f"\nResults: {len(successful_methods)}/11 methods successful")
+        print(f"Successful: {successful_methods}")
+        if failed_methods:
+            print(f"Failed: {[m[0] for m in failed_methods]}")
+        # All 11 methods should work
+        assert len(successful_methods) == 11, f"Expected 11 successful methods, got {len(successful_methods)}"
+        assert len(failed_methods) == 0, f"Some methods failed: {failed_methods}"
+    def test_invalid_method_handling(self, sample_data, mock_config):
+        """Test 8: Invalid methods are handled correctly"""
+        df_train, feats_train = sample_data
+        balancer = DataBalancer(random_state=42)
+        # Test that invalid methods are detected by validation
+        assert balancer.is_valid_method('invalid_method') == False
+        assert balancer.is_valid_method('nonexistent') == False
+        assert balancer.is_valid_method('') == False
+        # Note: The actual balance_features() with invalid method calls sys.exit()
+        # This is expected behavior in the current implementation
+        print("✓ Invalid method validation works correctly")
+def test_simple_integration():
+    """Test 9: Simple integration test without fixtures"""
+    print("Simple integration test...")
+    # Create simple data
+    np.random.seed(42)
+    features = np.random.randn(60, 5)
+    labels = np.array([0] * 40 + [1] * 20)  # 40 vs 20 imbalance
+    df_train = pd.DataFrame({'target': labels})
+    # Test a few key methods
+    balancer = DataBalancer(random_state=42)
+    key_methods = ['ros', 'smote', 'clustercentroids', 'randomundersampler']
+    for method in key_methods:
+        balanced_df, balanced_features = balancer.balance_features(
+            df_train=df_train,
+            feats_train=features,
+            target_column='target',
+            method=method
+        )
+        assert len(balanced_df) == len(balanced_features)
+        print(f"✓ {method} integration test passed")
+    print("✓ Integration test completed")
+if __name__ == "__main__":
+    print("Running simple balancing tests...")
+    print("=" * 50)
+    # Run integration test
+    test_simple_integration()
+    print("=" * 50)
+    print("Direct test completed! Run 'pytest test_balancing.py -v' for full tests")

nkululeko/utils/util.py CHANGED Viewed

@@ -1,16 +1,18 @@
 # util.py
 import ast
 import configparser
+import json
 import logging
 import os.path
 import pickle
 import sys
-import audeer
-import audformat
 import numpy as np
 import pandas as pd
+import audeer
+import audformat
 class Util:
     # a list of words that need not to be warned upon if default values are
@@ -92,6 +94,8 @@ class Util:
                 dir_name = "./results/"
             elif entry == "model_dir":
                 dir_name = "./models/"
+            elif entry == "cache":
+                dir_name = "./cache/"
             else:
                 dir_name = "./store/"
         else:
@@ -102,13 +106,15 @@ class Util:
             except KeyError:
                 # some default values
                 if entry == "fig_dir":
-                    entryn = "./images/"
+                    entryn = "images/"
                 elif entry == "res_dir":
-                    entryn = "./results/"
+                    entryn = "results/"
                 elif entry == "model_dir":
-                    entryn = "./models/"
+                    entryn = "models/"
+                elif entry == "cache":
+                    entryn = "cache/"
                 else:
-                    entryn = "./store/"
+                    entryn = "store/"
             # Expand image, model and result directories with run index
             if entry == "fig_dir" or entry == "res_dir" or entry == "model_dir":
@@ -328,6 +334,7 @@ class Util:
             self.logger.warning(f"WARNING: {self.caller}: {message}")
         else:
             print(f"WARNING: {message}")
     def debug(self, message):
         if self.logger is not None:
             self.logger.debug(f"DEBUG: {self.caller}: {message}")
@@ -505,3 +512,28 @@ class Util:
     def to_3_digits_str(self, x):
         """Given a float, return this to 3 digits as string without integer number."""
         return str(self.to_3_digits(x))[1:]
+    def save_json(self, file: str, var: dict):
+        """Save variable to json file.
+        Args:
+            file: path to json file
+            var: dictionary to store
+        """
+        with open(file, "w", encoding="utf-8") as fp:
+            json.dump(var, fp, ensure_ascii=False, indent=2)
+    def read_json(self, file: str) -> object:
+        """Read variable from json file.
+        Args:
+            file: path to json file
+        Returns:
+            content of json file
+        """
+        with open(file, "r") as fp:
+            return json.load(fp)

{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nkululeko
-Version: 0.94.3
+Version: 0.95.1
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt

{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/RECORD RENAMED Viewed

@@ -2,44 +2,46 @@ examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=wpiHCJ7zsW38kumg3ypwXZe2HQrhUblAnv7P2QeJnAc,3525
 nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
+nkululeko/balance.py,sha256=r7opXbrqAipm2euPPaOmLlA5J10p2bHQgO5kWk2x9ro,8702
 nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
-nkululeko/constants.py,sha256=KCqkmtwj--gcAdaRwj_Zb44_ewVNp06Hfp8-YGDG8iI,39
+nkululeko/constants.py,sha256=9E1ltDzIxGnwuxdRBW6OUWwJB8Im9_c4dnOUwjcDDr8,39
 nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
 nkululeko/demo.py,sha256=tu7Al2l5MCLVegkDC-NE2wcuc_YE7NRbgOlPW3yhGEs,4940
 nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
 nkululeko/demo_predictor.py,sha256=lDF-xOxRdEAclOmbepAYg-BQXQdGkHfq2n74PTIoop8,4872
 nkululeko/ensemble.py,sha256=71V-rre61H3J4sh7lu-OTo4I2_g7mm_rQxwW1ARDHgY,12782
-nkululeko/experiment.py,sha256=xZQ3SpFhH4QByRzVBCO4Ps84KDXKuVPZ_qUzLUPgN5g,36221
-nkululeko/explore.py,sha256=FPM2CS-LKgcDV-LnjYlD6pEv7HuCQpH_C3KyyiOCdk4,3589
+nkululeko/experiment.py,sha256=hdFvRA7EoQz10nId9MwcbYOTz2ifYeGrFKVJOv9a88Q,38394
+nkululeko/explore.py,sha256=aDVHwuo-lkih7VZrbb_zFKg5fowSrAIcx0V9wf0SRGo,4175
 nkululeko/export.py,sha256=U-V4acxtuL6qKt6oAsVcM5TTeWogYUJ3GU-lA6rq6d4,4336
 nkululeko/feature_extractor.py,sha256=X6ZWDjGwUMVwnP6TkCEnw8B4xo8eWvJa1QT9-0WUuvA,4102
 nkululeko/file_checker.py,sha256=xJY0Q6w47pnmgJVK5rcAKPYBrCpV7eBT4_3YBzTx-H8,3454
 nkululeko/filter_data.py,sha256=4sGrKvMZ_hLnJPrHm_CqjDPKIRV8REWoT7nfSYGXbwo,7305
 nkululeko/fixedsegment.py,sha256=Tb92QiuiyMsOO3WRWwuGjZGibS8hbHHCrcWAXGk7g04,2868
-nkululeko/glob_conf.py,sha256=KL9YJQTHvTztxo1vr25qRRgaPnx4NTg0XrdbovKGMmw,525
-nkululeko/modelrunner.py,sha256=NpDgXfKkn8dOrQzhUiEfGI56Qrb1sOtWTD31II4Zgbk,11550
+nkululeko/glob_conf.py,sha256=NLFh-1_I0Wdfo2EnSq1Oppx23AX6jAUpgFbk2zqZJ24,659
+nkululeko/modelrunner.py,sha256=OFN18uG84iJyjNVWjcvDpqbcBrmylziXCakUTNE2-ZQ,10530
 nkululeko/multidb.py,sha256=sO6OwJn8sn1-C-ig3thsIL8QMWHdV9SnJhDodKjeKrI,6876
 nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
-nkululeko/nkululeko.py,sha256=FaLimlbx47rJgWgDEd0ZROAiXy2cOypliVdqJn-Bvws,2257
-nkululeko/plots.py,sha256=i9VIkviBWLgncfnyK44TUMzg2Xa0_UhfL0LnMF1vHTw,27022
-nkululeko/predict.py,sha256=MLnHEyFmSiHLLs-HDczag8Vu3zKF5T1rXLKdZZJ6py8,2083
+nkululeko/nkululeko.py,sha256=6ALPMMIz6l0O3IRaP0q4b59ZUxpfzNqLQUqZMf5t3Zo,1976
+nkululeko/plots.py,sha256=lUxgyoriYTwdpHZvBBQ4e41v77deQrt0PcRDLJWijys,27503
+nkululeko/predict.py,sha256=PWv1Pc39lrxqqIWrYszVk5SL37dDL93CHgcruItNID8,2211
 nkululeko/resample.py,sha256=rn3-M1A-iwVGibfQNGyeYNa7briD24lIN9Szq_1uTJo,5194
 nkululeko/runmanager.py,sha256=YtGQP0UyyQTKkilncB1XYM-T8oatzGcZEOcj5SorjJw,8902
 nkululeko/scaler.py,sha256=a4lKwWT436TV4VEvqtP1uQ58Yz67XVHr1HjO5gp3xLI,5109
 nkululeko/segment.py,sha256=7UrJEwdLmh9wDL5iBwpdJyJm9dwSxidHrHt-_D2qtxw,4949
 nkululeko/syllable_nuclei.py,sha256=5w_naKxNxz66a_qLkraemi2fggM-gWesiiBPS47iFcE,9931
 nkululeko/test.py,sha256=1w624vo5KTzmFC8BUStGlLDmIEAFuJUz7J0W-gp7AxI,1677
-nkululeko/test_predictor.py,sha256=RPQxorj1uygLeSnAuNims5CFccXwrDwTnrIDs-gDlNQ,2859
+nkululeko/test_predictor.py,sha256=i8vSaB8OOrdELoDttQVMs2Bc-fUOi2C5ANqnt32K3Zk,3064
 nkululeko/test_pretrain.py,sha256=6FZeETlWzg9Cq_sn3BFKhfH91jW26nAIDm1bJkInNNA,8463
 nkululeko/augmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/augmenting/augmenter.py,sha256=TUUznEz0pe9DSMC9r7LoBckuvsJTprvypeV5-8zLn20,2846
 nkululeko/augmenting/randomsplicer.py,sha256=TQTy4RBt6XbWiuUu5Ic913DMvmwTUwEufldBJjo7i1s,2801
 nkululeko/augmenting/randomsplicing.py,sha256=GXCpCDdOsOyWACDJ3ujmFZBVe6ISvkoQLefBNPgxxow,1750
-nkululeko/augmenting/resampler.py,sha256=j2yuB9h9UwGQHqwF8CZPSGqAfOiyQV3979WQjU2toVM,3962
+nkululeko/augmenting/resampler.py,sha256=c5AjohxomX8ujOoJRnLZoNe1fxY8Fdw6LAdFm9KDy78,4020
 nkululeko/autopredict/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/autopredict/ap_age.py,sha256=yzd8sF6gi0hnqNawyLBCIkt-pKgl9gYPlZHsrLGfz0U,1098
 nkululeko/autopredict/ap_arousal.py,sha256=lpv3jTSVEVCcR226JevNM6S7e0_uMZXHb_8Wpup1yj8,1027
 nkululeko/autopredict/ap_dominance.py,sha256=Ltq5x0ralxU1758_e-nNKvzexiPUM66xLAm3Wo2B07c,1040
+nkululeko/autopredict/ap_emotion.py,sha256=1efW3cQjwc804Pf2aYU-XfjYtYXtZdyeiXtWL439x6o,1030
 nkululeko/autopredict/ap_gender.py,sha256=RjLv9YxY9OPHT_gnd6htjKQzQA4DSKcbjipKGjHHx2A,1011
 nkululeko/autopredict/ap_mos.py,sha256=PMLU67JDgYQMobRSR2vW9cWoL3QK5JbhLM65fVsRGkc,1108
 nkululeko/autopredict/ap_pesq.py,sha256=EuJ9u6oaSPWdYsaU8q3t8tiFKhfW1qdqgO-cySpfxw0,1141
@@ -47,28 +49,32 @@ nkululeko/autopredict/ap_sdr.py,sha256=xYCy4M_aWzoFiYD_KOK9ys2P2v0bfxNkLcIRdi5z2
 nkululeko/autopredict/ap_sid.py,sha256=b_JwVWlqcwdC7acU9Q7mExuOJKUn6qdlmQTm8pmmptk,2642
 nkululeko/autopredict/ap_snr.py,sha256=cjc0pUsCN_RacTw1UBR2cGY9t_um48f2cjo3QJDn7bw,1111
 nkululeko/autopredict/ap_stoi.py,sha256=csv9qCcRmieHAhypszqGoGt9r3biM8IYPgcTwp9GIFM,1188
+nkululeko/autopredict/ap_text.py,sha256=zaz9qIg90-ghZhBe1ka0HoUnap6s6RyopUKoCpttHOU,1333
 nkululeko/autopredict/ap_valence.py,sha256=9S06SpO_zXKSpkf0InHYYXZcD9HDGoCJ6UPkn__eBAg,1027
 nkululeko/autopredict/estimate_snr.py,sha256=1k9-XadABudnsNOeFZD_Fg0E64-GUQVS7JEp82MLQS4,4995
+nkululeko/autopredict/whisper_transcriber.py,sha256=DWDvpRaV5KmUF18ojPEvxnVXm_h_nWyY-TfW2Ngd5N8,2941
+nkululeko/autopredict/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+nkululeko/autopredict/tests/test_whisper_transcriber.py,sha256=ilas6j3OUvq_xnQCRZgytQCtyrpNU6tvG5a8kPvVKBQ,5085
 nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/data/dataset.py,sha256=JLbBYGniUrjwxs-HtbIyhqO3Cv-ELfpmlq7jzij4dBc,41759
 nkululeko/data/dataset_csv.py,sha256=AIbtB6pGk5BSQGIgfokZ7tEGFjmuOq5w2XumRSimVWs,4833
 nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/feat_extract/feats_agender.py,sha256=onfAQ6-xx_mFMJXEF1IX8cHBmGtGeX6weJmxbkfh1_o,3184
 nkululeko/feat_extract/feats_agender_agender.py,sha256=_YQv1qw--3uQfnyTQDCwlmPRnrhdMhgXbYK2yQtseW0,3464
-nkululeko/feat_extract/feats_analyser.py,sha256=txuIEgO4uprle35RzBczvZm5Hc7iUl2p9oBEfdrvg_I,13506
+nkululeko/feat_extract/feats_analyser.py,sha256=lodim7qQ8M7c3iMeJ5bHQ-nCy9Cehx1Xl5K3leii6-w,14768
 nkululeko/feat_extract/feats_ast.py,sha256=w62xEoLiFtU-rj6SXkqXAktmoFaXcAcAWpUyEjp8JWo,4652
 nkululeko/feat_extract/feats_auddim.py,sha256=CGLp_aYhudfwoU5522vjrvjPxfZcyw593A8xLjYefV8,3134
 nkululeko/feat_extract/feats_audmodel.py,sha256=OsZyB1rdcG0Fai2gAwBlbuubmWor1_-P4IDkZLqgPKE,3161
 nkululeko/feat_extract/feats_clap.py,sha256=1tttpfm2SJmQgYm2u8eUVpDiDOpWdKqFChpY3ZZokNs,3395
-nkululeko/feat_extract/feats_emotion2vec.py,sha256=ObVlqbsJsw-hWGsUOXY68Ebynt5Bn4Xtlu_Gvq3XJI4,8728
+nkululeko/feat_extract/feats_emotion2vec.py,sha256=LnV8xEg7L7HIDqz0ulqUNoaAHBU0d5gyQPb2_32T_18,9694
 nkululeko/feat_extract/feats_hubert.py,sha256=F3vrPCkx8EimJjFWYCZ7Yg9uo1G3NjYt4UKrGIUev8k,5172
 nkululeko/feat_extract/feats_import.py,sha256=cPi4XRuRs71npB8YGXr7rYOvkeTU_oZEl3GrGncdiqY,2222
 nkululeko/feat_extract/feats_mld.py,sha256=5aRoYiGDm5ApoFntxAMQYPjEelXHHRBHZcAJR9dxaeI,1945
 nkululeko/feat_extract/feats_mos.py,sha256=vkH1FdXtduoU0-yjBtVccC2b_p_eyH8laRnwlL7QTVM,4136
-nkululeko/feat_extract/feats_opensmile copy.py,sha256=BLj5sUaBPz7vLPfNlt9LdQurSypmViqgSpPK-6aXGhQ,4029
 nkululeko/feat_extract/feats_opensmile.py,sha256=HwbGs0EaPxZ7DznQZFem8RYgyQWz02oya77uVY7KhZE,9203
 nkululeko/feat_extract/feats_oxbow.py,sha256=TRoEJx5EKZiqoPoPRibHc0vkBMoZcKlGoGNq4NbyHZw,4895
-nkululeko/feat_extract/feats_praat.py,sha256=jZ-XXbP3iy25QQIzA4Hrv0HxsYvJNPavoCW2FyJNKMg,3064
+nkululeko/feat_extract/feats_praat.py,sha256=3j1xySKqW74USjk8DweWAajHeTcuszKCFY1htQhe1cY,3070
+nkululeko/feat_extract/feats_praat_core.py,sha256=Q0OVuo5h38a860yflzRtUpy0J0w7WCg0aBLrDhIskFc,28524
 nkululeko/feat_extract/feats_snr.py,sha256=Zxwo78HLleNsziYLOj34RQUnp9I7r1yMXqjYipDOjZw,2761
 nkululeko/feat_extract/feats_spectra.py,sha256=6WhFUpB0WTutg7OFMlAw9lSwVU5OBYCDcPRxaiH-Qn8,3621
 nkululeko/feat_extract/feats_spkrec.py,sha256=o_6bdU4lIkj64S5Kdjf1iyuo1VASeYxE4XdxV94a8gE,4732
@@ -78,13 +84,15 @@ nkululeko/feat_extract/feats_wav2vec2.py,sha256=q1QzMD3KbhF2SOmxdwI7CiViRmhlFRyg
 nkululeko/feat_extract/feats_wavlm.py,sha256=O9cfc39VF5aPJRRATKb37pHT4W11i2cu5O1mY9LOjIA,4755
 nkululeko/feat_extract/feats_whisper.py,sha256=n3ESZtva7wshs8E8diBlQYa9xCH_P0UY1DncSrxz-FY,4508
 nkululeko/feat_extract/featureset.py,sha256=clcBv9rzBRW-bfw7JC_FYTjU5uUS-c0UE1XtQLYYRiE,1615
-nkululeko/feat_extract/feinberg_praat.py,sha256=bgzWtQkKbgcygrzwAxDXosui1rcc38qhWuJq9GLr0z8,21308
 nkululeko/feat_extract/transformer_feature_extractor.py,sha256=LaXuW-AJZ931ttLis0J5h9N3RtiiE51BnkxJR-bubfY,5837
+nkululeko/feat_extract/tests/__init__.py,sha256=pzjkYs1PNo7107jIXKa_xwdBR2SKxzkg53a9W3bvbpw,32
+nkululeko/feat_extract/tests/test_feats_opensmile.py,sha256=eYjGBsH6UkuRleKzGZHNv2cXRZz2xPCw0dkTfXw5S9s,5761
+nkululeko/feat_extract/tests/test_feats_praat_core.py,sha256=ntbpIrehr4D-lOvaE0hNCe-og5sN4syBGBUTuNGZpDo,20916
 nkululeko/losses/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/losses/loss_ccc.py,sha256=NOK0y0fxKUnU161B5geap6Fmn8QzoPl2MqtPiV8IuJE,976
 nkululeko/losses/loss_softf1loss.py,sha256=5gW-PuiqeAZcRgfwjueIOQtMokOjZWgQnVIv59HKTCo,1309
 nkululeko/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nkululeko/models/model.py,sha256=0O6H-kME1yVHU-EKu5iOZVBB7fFNg3lfagvGgMrldxM,14426
+nkululeko/models/model.py,sha256=2STBD3jtLKeNSk7arCFJdaV6FL-nuLR1qpsjvZ4W-9A,12975
 nkululeko/models/model_bayes.py,sha256=tQUXEsXoS6WnfapQjP78S_gxNBssTOqE78A2iG8SfLU,407
 nkululeko/models/model_cnn.py,sha256=TKj43865epsiK7a0COyfBDaFHKOYgWgnPpMVCPWUhCM,10497
 nkululeko/models/model_gmm.py,sha256=mhHFNtTzHuJvqYSA0h5YhvjA--KhnN6MTU_S0G3-d1c,1332
@@ -97,30 +105,35 @@ nkululeko/models/model_svm.py,sha256=zP8ykLhCZTYvwSqw06XHuzq9qMBtsiYpxjUpWDAnMyA
 nkululeko/models/model_svr.py,sha256=FEwYRdgqwgGhZdkpRnT7Ef12lklWi6GZL28PyV99xWs,726
 nkululeko/models/model_tree.py,sha256=6L3PD3aIiiQz1RPWS6z3Edx4f0gnR7AOfBKOJzf0BNU,433
 nkululeko/models/model_tree_reg.py,sha256=IMaQpNImoRqP8Biw1CsJevxpV_PVpKblsKtYlMW5d_U,429
-nkululeko/models/model_tuned.py,sha256=VuRyNqw3XTpQ2eHsWOJN8X-V98AN8Wqiq7UgwT5BQRU,23763
-nkululeko/models/model_xgb.py,sha256=zfZM3lqH5uttVB18b1MRIhP9CCeCuIh1ycgOuFMcqUM,449
+nkululeko/models/model_tuned.py,sha256=74c_pQUtpx_x8bM3r5ufuqhaaQxfy6KRUqirdzSac-Q,35999
+nkululeko/models/model_xgb.py,sha256=_VxFFP1QcoyxrwvJSrzdIwwDt85IulUWvg1BxXBgN1Y,6616
 nkululeko/models/model_xgr.py,sha256=H01FJCRgmX2unvambMs5TTCS9sI6VDB9ip9G6rVGt2c,419
 nkululeko/models/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+nkululeko/models/tests/test_model_knn.py,sha256=hFCJ0C0taQO-fwA7j8HcFrwCSluSb6Vg4NCQQ_zL4bc,1793
+nkululeko/models/tests/test_model_mlp.py,sha256=XVvniKAtroxLRKyYGW-ew1mHuRo3_cWk4nGnXQ5aDEk,4977
 nkululeko/models/tests/test_model_svm.py,sha256=spDlZmeBKBdK4EFBpOgEkaAfGeGH9kau6CqSWOY6Uag,1856
+nkululeko/models/tests/test_model_xgb.py,sha256=-Rz5YTeqUJ4Kwdh5ny31c3zxsUJXTypR4L3ItoOU7yU,1036
 nkululeko/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/reporting/defines.py,sha256=0vh-Tlx4fAPpk1o6mP_4x3EkIoqzYMr38IZnj-JM5z4,641
 nkululeko/reporting/latex_writer.py,sha256=NGwSIfd4nfslDkNUOSZSdqY_VDLA8634thyhe-vj1bY,1824
-nkululeko/reporting/report.py,sha256=bYN8B66gg3IWHAyfd6uIVjpYKy3rOI6aEwgfXU0LSAY,1006
+nkululeko/reporting/report.py,sha256=B5eoIKMz46VKDBsi7M9u_iegzAD-E3eGCmolzSFjZ3c,1118
 nkululeko/reporting/report_item.py,sha256=drkknsyFhGviaPJNmPQtCXJmRhTSSfjNcJt0Bls6JAA,533
-nkululeko/reporting/reporter.py,sha256=-VyV0TZ0vBAx6UZNegnKS3i3WpkF27ntBRlYvp9NNiQ,20174
+nkululeko/reporting/reporter.py,sha256=e-piNtnv0QUWKs9Ha_d4CzgqJxPBG9XBm3Ru8y0ot-U,20896
 nkululeko/reporting/result.py,sha256=G63a2tHCwHhM6NBJgYzsWKWJm4Yu3r4hsCHA2Km7eHU,1073
 nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=b3t0zdpJYofKWMyKRMtMMX91xeR-k8d5pbnNaQHcsOE,1902
 nkululeko/segmenting/seg_pyannote.py,sha256=6IPbgjnGOz9juzEKDTZN3PSipX4t6Mz-DILAx3rp5do,4216
 nkululeko/segmenting/seg_silero.py,sha256=ulodnvtRq5MLHDxy_RmAK4tJg6h1d-mPq-uCPFkGVKg,4258
+nkululeko/tests/__init__.py,sha256=XzD6C-ZuewsccUwx7KzEUtUxJrRx2d7sPFViscjf1O0,30
+nkululeko/tests/test_balancing.py,sha256=21110R77iTcSWKiSTxYDkJ26lxPFTlZf_ZwVjeiSh4w,10164
 nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=SrrYaU7AB80MZHiV1jcB0h_zigvYLYgSVNTXV4ao38g,4593
 nkululeko/utils/stats.py,sha256=3Fyx8q8BSKYmiufT6OkRug9RATWmGrr9BaX_y8jziWo,3074
 nkululeko/utils/unzip.py,sha256=G68f5120TjwACZC3bQcneMniddnwubPbBdMc2L5KBOo,1206
-nkululeko/utils/util.py,sha256=6NDKhOx0fV5fKyhSoY4hem96p7OuPcmhCDQR9EzkQhw,17829
-nkululeko-0.94.3.dist-info/licenses/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.94.3.dist-info/METADATA,sha256=QeZ9ZMTqwgdDvwRTCvgFO7X55_J84AWZh7jVf9uV-6M,2874
-nkululeko-0.94.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-nkululeko-0.94.3.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
-nkululeko-0.94.3.dist-info/top_level.txt,sha256=bf1k1YKkqcXemNX_cUgoyKqQ3_GVErPqAY-53J36jkM,19
-nkululeko-0.94.3.dist-info/RECORD,,
+nkululeko/utils/util.py,sha256=o62TZRcxO1VflINai6ojEzSmcbXIFInNLGogSbqJgiA,18561
+nkululeko-0.95.1.dist-info/licenses/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.95.1.dist-info/METADATA,sha256=KhJ1JPenNsZGUIhdeYGvNKrM1H-ioqONAh06LpxdnMQ,2874
+nkululeko-0.95.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+nkululeko-0.95.1.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
+nkululeko-0.95.1.dist-info/top_level.txt,sha256=bf1k1YKkqcXemNX_cUgoyKqQ3_GVErPqAY-53J36jkM,19
+nkululeko-0.95.1.dist-info/RECORD,,

nkululeko/feat_extract/feats_opensmile copy.py DELETED Viewed

@@ -1,93 +0,0 @@
-# opensmileset.py
-import os
-import opensmile
-import pandas as pd
-import nkululeko.glob_conf as glob_conf
-from nkululeko.feat_extract.featureset import Featureset
-class Opensmileset(Featureset):
-    def __init__(self, name, data_df, feats_type=None, config_file=None):
-        super().__init__(name, data_df, feats_type)
-        self.featset = self.util.config_val("FEATS", "set", "eGeMAPSv02")
-        try:
-            self.feature_set = eval(f"opensmile.FeatureSet.{self.featset}")
-            # 'eGeMAPSv02, ComParE_2016, GeMAPSv01a, eGeMAPSv01a':
-        except AttributeError:
-            self.util.error(f"something is wrong with feature set: {self.featset}")
-        self.featlevel = self.util.config_val("FEATS", "level", "functionals")
-        try:
-            self.featlevel = self.featlevel.replace("lld", "LowLevelDescriptors")
-            self.featlevel = self.featlevel.replace("functionals", "Functionals")
-            self.feature_level = eval(f"opensmile.FeatureLevel.{self.featlevel}")
-        except AttributeError:
-            self.util.error(f"something is wrong with feature level: {self.featlevel}")
-    def extract(self):
-        """Extract the features based on the initialized dataset or re-open them when found on disk."""
-        store = self.util.get_path("store")
-        store_format = self.util.config_val("FEATS", "store_format", "pkl")
-        storage = f"{store}{self.name}.{store_format}"
-        extract = eval(
-            self.util.config_val("FEATS", "needs_feature_extraction", "False")
-        )
-        no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
-        if extract or not os.path.isfile(storage) or no_reuse:
-            self.util.debug("extracting openSmile features, this might take a while...")
-            smile = opensmile.Smile(
-                feature_set=self.feature_set,
-                feature_level=self.feature_level,
-                num_workers=self.n_jobs,
-                verbose=True,
-            )
-            if isinstance(self.data_df.index, pd.MultiIndex):
-                self.df = smile.process_index(self.data_df.index)
-                self.df = self.df.set_index(self.data_df.index)
-            else:
-                self.df = smile.process_files(self.data_df.index)
-                self.df.index = self.df.index.droplevel(1)
-                self.df.index = self.df.index.droplevel(1)
-            self.util.write_store(self.df, storage, store_format)
-            try:
-                glob_conf.config["DATA"]["needs_feature_extraction"] = "False"
-            except KeyError:
-                pass
-        else:
-            self.util.debug(f"reusing extracted OS features: {storage}.")
-            self.df = self.util.get_store(storage, store_format)
-    def extract_sample(self, signal, sr):
-        smile = opensmile.Smile(
-            feature_set=self.feature_set,
-            feature_level=opensmile.FeatureLevel.Functionals,
-        )
-        feats = smile.process_signal(signal, sr)
-        return feats.to_numpy()
-    # def filter(self):
-    #     # use only the features that are indexed in the target dataframes
-    #     self.df = self.df[self.df.index.isin(self.data_df.index)]
-    #     try:
-    #         # use only some features
-    #         selected_features = ast.literal_eval(
-    #             glob_conf.config["FEATS"]["os.features"]
-    #         )
-    #         self.util.debug(f"selecting features from opensmile: {selected_features}")
-    #         sel_feats_df = pd.DataFrame()
-    #         hit = False
-    #         for feat in selected_features:
-    #             try:
-    #                 sel_feats_df[feat] = self.df[feat]
-    #                 hit = True
-    #             except KeyError:
-    #                 pass
-    #         if hit:
-    #             self.df = sel_feats_df
-    #             self.util.debug(
-    #                 "new feats shape after selecting opensmile features:"
-    #                 f" {self.df.shape}"
-    #             )
-    #     except KeyError:
-    #         pass

nkululeko 0.94.3__py3-none-any.whl → 0.95.1__py3-none-any.whl

nkululeko 0.94.3py3-none-any.whl → 0.95.1py3-none-any.whl