nkululeko 0.94.3__py3-none-any.whl → 0.95.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. nkululeko/augmenting/resampler.py +5 -2
  2. nkululeko/autopredict/ap_emotion.py +36 -0
  3. nkululeko/autopredict/ap_text.py +45 -0
  4. nkululeko/autopredict/tests/__init__.py +0 -0
  5. nkululeko/autopredict/tests/test_whisper_transcriber.py +122 -0
  6. nkululeko/autopredict/whisper_transcriber.py +81 -0
  7. nkululeko/balance.py +222 -0
  8. nkululeko/constants.py +1 -1
  9. nkululeko/experiment.py +53 -3
  10. nkululeko/explore.py +32 -13
  11. nkululeko/feat_extract/feats_analyser.py +45 -17
  12. nkululeko/feat_extract/feats_emotion2vec.py +51 -26
  13. nkululeko/feat_extract/feats_praat.py +3 -3
  14. nkululeko/feat_extract/feats_praat_core.py +769 -0
  15. nkululeko/feat_extract/tests/__init__.py +1 -0
  16. nkululeko/feat_extract/tests/test_feats_opensmile.py +162 -0
  17. nkululeko/feat_extract/tests/test_feats_praat_core.py +507 -0
  18. nkululeko/glob_conf.py +9 -0
  19. nkululeko/modelrunner.py +15 -39
  20. nkululeko/models/model.py +4 -42
  21. nkululeko/models/model_tuned.py +416 -84
  22. nkululeko/models/model_xgb.py +148 -2
  23. nkululeko/models/tests/test_model_knn.py +49 -0
  24. nkululeko/models/tests/test_model_mlp.py +153 -0
  25. nkululeko/models/tests/test_model_xgb.py +33 -0
  26. nkululeko/nkululeko.py +0 -9
  27. nkululeko/plots.py +25 -19
  28. nkululeko/predict.py +8 -6
  29. nkululeko/reporting/report.py +7 -5
  30. nkululeko/reporting/reporter.py +20 -5
  31. nkululeko/test_predictor.py +7 -1
  32. nkululeko/tests/__init__.py +1 -0
  33. nkululeko/tests/test_balancing.py +270 -0
  34. nkululeko/utils/util.py +38 -6
  35. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/METADATA +1 -1
  36. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/RECORD +40 -27
  37. nkululeko/feat_extract/feats_opensmile copy.py +0 -93
  38. nkululeko/feat_extract/feinberg_praat.py +0 -628
  39. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/WHEEL +0 -0
  40. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/entry_points.txt +0 -0
  41. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/licenses/LICENSE +0 -0
  42. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,270 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simple and comprehensive test suite for all balancing methods in DataBalancer.
4
+
5
+ Tests all 11 balancing methods from balance.py:
6
+
7
+ Oversampling (5): ros, smote, adasyn, borderlinesmote, svmsmote
8
+ Undersampling (4): clustercentroids, randomundersampler, editednearestneighbours, tomeklinks
9
+ Combination (2): smoteenn, smotetomek
10
+
11
+ Run with: pytest nkululeko/tests/test_balancing.py -v
12
+ """
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+ import pytest
17
+ from nkululeko.balance import DataBalancer
18
+ import nkululeko.glob_conf as glob_conf
19
+
20
+
21
+ @pytest.fixture
22
+ def sample_data():
23
+ """Create sample imbalanced data that works with all methods"""
24
+ np.random.seed(42)
25
+
26
+ # Majority class: 100 samples, Minority class: 25 samples
27
+ # Well-separated for better algorithm performance
28
+ majority_features = np.random.randn(100, 10)
29
+ minority_features = np.random.randn(25, 10) + 3 # Good separation
30
+
31
+ features = np.vstack([majority_features, minority_features])
32
+ labels = np.array([0] * 100 + [1] * 25)
33
+
34
+ df_train = pd.DataFrame({'target': labels})
35
+ feats_train = features
36
+
37
+ return df_train, feats_train
38
+
39
+
40
+ @pytest.fixture
41
+ def mock_config():
42
+ """Mock configuration for testing"""
43
+ original_config = getattr(glob_conf, 'config', None)
44
+
45
+ glob_conf.config = {
46
+ 'FEATS': {'balancing': 'smote'},
47
+ 'DATA': {'target': 'target'},
48
+ 'MODEL': {'type': 'mlp'}
49
+ }
50
+
51
+ yield glob_conf.config
52
+
53
+ if original_config is not None:
54
+ glob_conf.config = original_config
55
+
56
+
57
+ class TestDataBalancer:
58
+ """Simple test suite for DataBalancer - tests all 11 methods"""
59
+
60
+ def test_initialization(self):
61
+ """Test 1: DataBalancer can be initialized"""
62
+ balancer = DataBalancer(random_state=42)
63
+ assert balancer is not None
64
+ assert balancer.random_state == 42
65
+
66
+ def test_get_all_supported_methods(self):
67
+ """Test 2: All 11 methods are reported as supported"""
68
+ balancer = DataBalancer()
69
+ methods = balancer.get_supported_methods()
70
+
71
+ # Check we have all 3 categories
72
+ assert 'oversampling' in methods
73
+ assert 'undersampling' in methods
74
+ assert 'combination' in methods
75
+
76
+ # Check exact counts
77
+ assert len(methods['oversampling']) == 5
78
+ assert len(methods['undersampling']) == 4
79
+ assert len(methods['combination']) == 2
80
+
81
+ # Total should be 11
82
+ total = (len(methods['oversampling']) +
83
+ len(methods['undersampling']) +
84
+ len(methods['combination']))
85
+ assert total == 11
86
+
87
+ def test_method_validation(self):
88
+ """Test 3: Method validation works correctly"""
89
+ balancer = DataBalancer()
90
+
91
+ # Valid methods
92
+ assert balancer.is_valid_method('ros') == True
93
+ assert balancer.is_valid_method('smote') == True
94
+ assert balancer.is_valid_method('clustercentroids') == True
95
+ assert balancer.is_valid_method('smoteenn') == True
96
+
97
+ # Invalid methods
98
+ assert balancer.is_valid_method('invalid') == False
99
+ assert balancer.is_valid_method('') == False
100
+
101
+ def test_all_oversampling_methods(self, sample_data, mock_config):
102
+ """Test 4: All 5 oversampling methods work"""
103
+ df_train, feats_train = sample_data
104
+ balancer = DataBalancer(random_state=42)
105
+
106
+ oversampling_methods = ['ros', 'smote', 'adasyn', 'borderlinesmote', 'svmsmote']
107
+
108
+ for method in oversampling_methods:
109
+ print(f"Testing oversampling: {method}")
110
+
111
+ balanced_df, balanced_features = balancer.balance_features(
112
+ df_train=df_train,
113
+ feats_train=feats_train,
114
+ target_column='target',
115
+ method=method
116
+ )
117
+
118
+ # Basic checks
119
+ assert len(balanced_df) >= len(df_train), f"{method} should increase/maintain size"
120
+ assert len(balanced_df) == len(balanced_features), f"{method} length mismatch"
121
+ assert balanced_features.shape[1] == feats_train.shape[1], f"{method} feature dim changed"
122
+
123
+ print(f"✓ {method} passed")
124
+
125
+ def test_all_undersampling_methods(self, sample_data, mock_config):
126
+ """Test 5: All 4 undersampling methods work"""
127
+ df_train, feats_train = sample_data
128
+ balancer = DataBalancer(random_state=42)
129
+
130
+ undersampling_methods = ['clustercentroids', 'randomundersampler',
131
+ 'editednearestneighbours', 'tomeklinks']
132
+
133
+ for method in undersampling_methods:
134
+ print(f"Testing undersampling: {method}")
135
+
136
+ balanced_df, balanced_features = balancer.balance_features(
137
+ df_train=df_train,
138
+ feats_train=feats_train,
139
+ target_column='target',
140
+ method=method
141
+ )
142
+
143
+ # Basic checks
144
+ assert len(balanced_df) <= len(df_train), f"{method} should decrease/maintain size"
145
+ assert len(balanced_df) == len(balanced_features), f"{method} length mismatch"
146
+ assert balanced_features.shape[1] == feats_train.shape[1], f"{method} feature dim changed"
147
+
148
+ print(f"✓ {method} passed")
149
+
150
+ def test_all_combination_methods(self, sample_data, mock_config):
151
+ """Test 6: All 2 combination methods work"""
152
+ df_train, feats_train = sample_data
153
+ balancer = DataBalancer(random_state=42)
154
+
155
+ combination_methods = ['smoteenn', 'smotetomek']
156
+
157
+ for method in combination_methods:
158
+ print(f"Testing combination: {method}")
159
+
160
+ balanced_df, balanced_features = balancer.balance_features(
161
+ df_train=df_train,
162
+ feats_train=feats_train,
163
+ target_column='target',
164
+ method=method
165
+ )
166
+
167
+ # Basic checks
168
+ assert len(balanced_df) == len(balanced_features), f"{method} length mismatch"
169
+ assert balanced_features.shape[1] == feats_train.shape[1], f"{method} feature dim changed"
170
+ assert len(balanced_df) > 0, f"{method} resulted in empty dataset"
171
+
172
+ print(f"✓ {method} passed")
173
+
174
+ def test_all_11_methods_comprehensive(self, sample_data, mock_config):
175
+ """Test 7: All 11 methods work in one comprehensive test"""
176
+ df_train, feats_train = sample_data
177
+ balancer = DataBalancer(random_state=42)
178
+
179
+ # Get all methods from the balancer itself
180
+ all_methods = balancer.get_supported_methods()
181
+
182
+ successful_methods = []
183
+ failed_methods = []
184
+
185
+ print("Testing all 11 balancing methods...")
186
+
187
+ for category, methods in all_methods.items():
188
+ for method in methods:
189
+ try:
190
+ balanced_df, balanced_features = balancer.balance_features(
191
+ df_train=df_train,
192
+ feats_train=feats_train,
193
+ target_column='target',
194
+ method=method
195
+ )
196
+
197
+ # Verify results
198
+ assert len(balanced_df) == len(balanced_features)
199
+ assert balanced_features.shape[1] == feats_train.shape[1]
200
+ assert len(balanced_df) > 0
201
+
202
+ successful_methods.append(method)
203
+ print(f"✓ {method} succeeded")
204
+
205
+ except Exception as e:
206
+ failed_methods.append((method, str(e)))
207
+ print(f"✗ {method} failed: {str(e)}")
208
+
209
+ print(f"\nResults: {len(successful_methods)}/11 methods successful")
210
+ print(f"Successful: {successful_methods}")
211
+ if failed_methods:
212
+ print(f"Failed: {[m[0] for m in failed_methods]}")
213
+
214
+ # All 11 methods should work
215
+ assert len(successful_methods) == 11, f"Expected 11 successful methods, got {len(successful_methods)}"
216
+ assert len(failed_methods) == 0, f"Some methods failed: {failed_methods}"
217
+
218
+ def test_invalid_method_handling(self, sample_data, mock_config):
219
+ """Test 8: Invalid methods are handled correctly"""
220
+ df_train, feats_train = sample_data
221
+ balancer = DataBalancer(random_state=42)
222
+
223
+ # Test that invalid methods are detected by validation
224
+ assert balancer.is_valid_method('invalid_method') == False
225
+ assert balancer.is_valid_method('nonexistent') == False
226
+ assert balancer.is_valid_method('') == False
227
+
228
+ # Note: The actual balance_features() with invalid method calls sys.exit()
229
+ # This is expected behavior in the current implementation
230
+ print("✓ Invalid method validation works correctly")
231
+
232
+
233
+ def test_simple_integration():
234
+ """Test 9: Simple integration test without fixtures"""
235
+ print("Simple integration test...")
236
+
237
+ # Create simple data
238
+ np.random.seed(42)
239
+ features = np.random.randn(60, 5)
240
+ labels = np.array([0] * 40 + [1] * 20) # 40 vs 20 imbalance
241
+
242
+ df_train = pd.DataFrame({'target': labels})
243
+
244
+ # Test a few key methods
245
+ balancer = DataBalancer(random_state=42)
246
+ key_methods = ['ros', 'smote', 'clustercentroids', 'randomundersampler']
247
+
248
+ for method in key_methods:
249
+ balanced_df, balanced_features = balancer.balance_features(
250
+ df_train=df_train,
251
+ feats_train=features,
252
+ target_column='target',
253
+ method=method
254
+ )
255
+
256
+ assert len(balanced_df) == len(balanced_features)
257
+ print(f"✓ {method} integration test passed")
258
+
259
+ print("✓ Integration test completed")
260
+
261
+
262
+ if __name__ == "__main__":
263
+ print("Running simple balancing tests...")
264
+ print("=" * 50)
265
+
266
+ # Run integration test
267
+ test_simple_integration()
268
+
269
+ print("=" * 50)
270
+ print("Direct test completed! Run 'pytest test_balancing.py -v' for full tests")
nkululeko/utils/util.py CHANGED
@@ -1,16 +1,18 @@
1
1
  # util.py
2
2
  import ast
3
3
  import configparser
4
+ import json
4
5
  import logging
5
6
  import os.path
6
7
  import pickle
7
8
  import sys
8
9
 
9
- import audeer
10
- import audformat
11
10
  import numpy as np
12
11
  import pandas as pd
13
12
 
13
+ import audeer
14
+ import audformat
15
+
14
16
 
15
17
  class Util:
16
18
  # a list of words that need not to be warned upon if default values are
@@ -92,6 +94,8 @@ class Util:
92
94
  dir_name = "./results/"
93
95
  elif entry == "model_dir":
94
96
  dir_name = "./models/"
97
+ elif entry == "cache":
98
+ dir_name = "./cache/"
95
99
  else:
96
100
  dir_name = "./store/"
97
101
  else:
@@ -102,13 +106,15 @@ class Util:
102
106
  except KeyError:
103
107
  # some default values
104
108
  if entry == "fig_dir":
105
- entryn = "./images/"
109
+ entryn = "images/"
106
110
  elif entry == "res_dir":
107
- entryn = "./results/"
111
+ entryn = "results/"
108
112
  elif entry == "model_dir":
109
- entryn = "./models/"
113
+ entryn = "models/"
114
+ elif entry == "cache":
115
+ entryn = "cache/"
110
116
  else:
111
- entryn = "./store/"
117
+ entryn = "store/"
112
118
 
113
119
  # Expand image, model and result directories with run index
114
120
  if entry == "fig_dir" or entry == "res_dir" or entry == "model_dir":
@@ -328,6 +334,7 @@ class Util:
328
334
  self.logger.warning(f"WARNING: {self.caller}: {message}")
329
335
  else:
330
336
  print(f"WARNING: {message}")
337
+
331
338
  def debug(self, message):
332
339
  if self.logger is not None:
333
340
  self.logger.debug(f"DEBUG: {self.caller}: {message}")
@@ -505,3 +512,28 @@ class Util:
505
512
  def to_3_digits_str(self, x):
506
513
  """Given a float, return this to 3 digits as string without integer number."""
507
514
  return str(self.to_3_digits(x))[1:]
515
+
516
+ def save_json(self, file: str, var: dict):
517
+ """Save variable to json file.
518
+
519
+ Args:
520
+ file: path to json file
521
+ var: dictionary to store
522
+
523
+ """
524
+ with open(file, "w", encoding="utf-8") as fp:
525
+ json.dump(var, fp, ensure_ascii=False, indent=2)
526
+
527
+ def read_json(self, file: str) -> object:
528
+ """Read variable from json file.
529
+
530
+ Args:
531
+ file: path to json file
532
+
533
+ Returns:
534
+ content of json file
535
+
536
+ """
537
+ with open(file, "r") as fp:
538
+ return json.load(fp)
539
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nkululeko
3
- Version: 0.94.3
3
+ Version: 0.95.1
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -2,44 +2,46 @@ examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
3
3
  nkululeko/aug_train.py,sha256=wpiHCJ7zsW38kumg3ypwXZe2HQrhUblAnv7P2QeJnAc,3525
4
4
  nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
5
+ nkululeko/balance.py,sha256=r7opXbrqAipm2euPPaOmLlA5J10p2bHQgO5kWk2x9ro,8702
5
6
  nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
6
- nkululeko/constants.py,sha256=KCqkmtwj--gcAdaRwj_Zb44_ewVNp06Hfp8-YGDG8iI,39
7
+ nkululeko/constants.py,sha256=9E1ltDzIxGnwuxdRBW6OUWwJB8Im9_c4dnOUwjcDDr8,39
7
8
  nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
8
9
  nkululeko/demo.py,sha256=tu7Al2l5MCLVegkDC-NE2wcuc_YE7NRbgOlPW3yhGEs,4940
9
10
  nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
10
11
  nkululeko/demo_predictor.py,sha256=lDF-xOxRdEAclOmbepAYg-BQXQdGkHfq2n74PTIoop8,4872
11
12
  nkululeko/ensemble.py,sha256=71V-rre61H3J4sh7lu-OTo4I2_g7mm_rQxwW1ARDHgY,12782
12
- nkululeko/experiment.py,sha256=xZQ3SpFhH4QByRzVBCO4Ps84KDXKuVPZ_qUzLUPgN5g,36221
13
- nkululeko/explore.py,sha256=FPM2CS-LKgcDV-LnjYlD6pEv7HuCQpH_C3KyyiOCdk4,3589
13
+ nkululeko/experiment.py,sha256=hdFvRA7EoQz10nId9MwcbYOTz2ifYeGrFKVJOv9a88Q,38394
14
+ nkululeko/explore.py,sha256=aDVHwuo-lkih7VZrbb_zFKg5fowSrAIcx0V9wf0SRGo,4175
14
15
  nkululeko/export.py,sha256=U-V4acxtuL6qKt6oAsVcM5TTeWogYUJ3GU-lA6rq6d4,4336
15
16
  nkululeko/feature_extractor.py,sha256=X6ZWDjGwUMVwnP6TkCEnw8B4xo8eWvJa1QT9-0WUuvA,4102
16
17
  nkululeko/file_checker.py,sha256=xJY0Q6w47pnmgJVK5rcAKPYBrCpV7eBT4_3YBzTx-H8,3454
17
18
  nkululeko/filter_data.py,sha256=4sGrKvMZ_hLnJPrHm_CqjDPKIRV8REWoT7nfSYGXbwo,7305
18
19
  nkululeko/fixedsegment.py,sha256=Tb92QiuiyMsOO3WRWwuGjZGibS8hbHHCrcWAXGk7g04,2868
19
- nkululeko/glob_conf.py,sha256=KL9YJQTHvTztxo1vr25qRRgaPnx4NTg0XrdbovKGMmw,525
20
- nkululeko/modelrunner.py,sha256=NpDgXfKkn8dOrQzhUiEfGI56Qrb1sOtWTD31II4Zgbk,11550
20
+ nkululeko/glob_conf.py,sha256=NLFh-1_I0Wdfo2EnSq1Oppx23AX6jAUpgFbk2zqZJ24,659
21
+ nkululeko/modelrunner.py,sha256=OFN18uG84iJyjNVWjcvDpqbcBrmylziXCakUTNE2-ZQ,10530
21
22
  nkululeko/multidb.py,sha256=sO6OwJn8sn1-C-ig3thsIL8QMWHdV9SnJhDodKjeKrI,6876
22
23
  nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
23
- nkululeko/nkululeko.py,sha256=FaLimlbx47rJgWgDEd0ZROAiXy2cOypliVdqJn-Bvws,2257
24
- nkululeko/plots.py,sha256=i9VIkviBWLgncfnyK44TUMzg2Xa0_UhfL0LnMF1vHTw,27022
25
- nkululeko/predict.py,sha256=MLnHEyFmSiHLLs-HDczag8Vu3zKF5T1rXLKdZZJ6py8,2083
24
+ nkululeko/nkululeko.py,sha256=6ALPMMIz6l0O3IRaP0q4b59ZUxpfzNqLQUqZMf5t3Zo,1976
25
+ nkululeko/plots.py,sha256=lUxgyoriYTwdpHZvBBQ4e41v77deQrt0PcRDLJWijys,27503
26
+ nkululeko/predict.py,sha256=PWv1Pc39lrxqqIWrYszVk5SL37dDL93CHgcruItNID8,2211
26
27
  nkululeko/resample.py,sha256=rn3-M1A-iwVGibfQNGyeYNa7briD24lIN9Szq_1uTJo,5194
27
28
  nkululeko/runmanager.py,sha256=YtGQP0UyyQTKkilncB1XYM-T8oatzGcZEOcj5SorjJw,8902
28
29
  nkululeko/scaler.py,sha256=a4lKwWT436TV4VEvqtP1uQ58Yz67XVHr1HjO5gp3xLI,5109
29
30
  nkululeko/segment.py,sha256=7UrJEwdLmh9wDL5iBwpdJyJm9dwSxidHrHt-_D2qtxw,4949
30
31
  nkululeko/syllable_nuclei.py,sha256=5w_naKxNxz66a_qLkraemi2fggM-gWesiiBPS47iFcE,9931
31
32
  nkululeko/test.py,sha256=1w624vo5KTzmFC8BUStGlLDmIEAFuJUz7J0W-gp7AxI,1677
32
- nkululeko/test_predictor.py,sha256=RPQxorj1uygLeSnAuNims5CFccXwrDwTnrIDs-gDlNQ,2859
33
+ nkululeko/test_predictor.py,sha256=i8vSaB8OOrdELoDttQVMs2Bc-fUOi2C5ANqnt32K3Zk,3064
33
34
  nkululeko/test_pretrain.py,sha256=6FZeETlWzg9Cq_sn3BFKhfH91jW26nAIDm1bJkInNNA,8463
34
35
  nkululeko/augmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
36
  nkululeko/augmenting/augmenter.py,sha256=TUUznEz0pe9DSMC9r7LoBckuvsJTprvypeV5-8zLn20,2846
36
37
  nkululeko/augmenting/randomsplicer.py,sha256=TQTy4RBt6XbWiuUu5Ic913DMvmwTUwEufldBJjo7i1s,2801
37
38
  nkululeko/augmenting/randomsplicing.py,sha256=GXCpCDdOsOyWACDJ3ujmFZBVe6ISvkoQLefBNPgxxow,1750
38
- nkululeko/augmenting/resampler.py,sha256=j2yuB9h9UwGQHqwF8CZPSGqAfOiyQV3979WQjU2toVM,3962
39
+ nkululeko/augmenting/resampler.py,sha256=c5AjohxomX8ujOoJRnLZoNe1fxY8Fdw6LAdFm9KDy78,4020
39
40
  nkululeko/autopredict/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
41
  nkululeko/autopredict/ap_age.py,sha256=yzd8sF6gi0hnqNawyLBCIkt-pKgl9gYPlZHsrLGfz0U,1098
41
42
  nkululeko/autopredict/ap_arousal.py,sha256=lpv3jTSVEVCcR226JevNM6S7e0_uMZXHb_8Wpup1yj8,1027
42
43
  nkululeko/autopredict/ap_dominance.py,sha256=Ltq5x0ralxU1758_e-nNKvzexiPUM66xLAm3Wo2B07c,1040
44
+ nkululeko/autopredict/ap_emotion.py,sha256=1efW3cQjwc804Pf2aYU-XfjYtYXtZdyeiXtWL439x6o,1030
43
45
  nkululeko/autopredict/ap_gender.py,sha256=RjLv9YxY9OPHT_gnd6htjKQzQA4DSKcbjipKGjHHx2A,1011
44
46
  nkululeko/autopredict/ap_mos.py,sha256=PMLU67JDgYQMobRSR2vW9cWoL3QK5JbhLM65fVsRGkc,1108
45
47
  nkululeko/autopredict/ap_pesq.py,sha256=EuJ9u6oaSPWdYsaU8q3t8tiFKhfW1qdqgO-cySpfxw0,1141
@@ -47,28 +49,32 @@ nkululeko/autopredict/ap_sdr.py,sha256=xYCy4M_aWzoFiYD_KOK9ys2P2v0bfxNkLcIRdi5z2
47
49
  nkululeko/autopredict/ap_sid.py,sha256=b_JwVWlqcwdC7acU9Q7mExuOJKUn6qdlmQTm8pmmptk,2642
48
50
  nkululeko/autopredict/ap_snr.py,sha256=cjc0pUsCN_RacTw1UBR2cGY9t_um48f2cjo3QJDn7bw,1111
49
51
  nkululeko/autopredict/ap_stoi.py,sha256=csv9qCcRmieHAhypszqGoGt9r3biM8IYPgcTwp9GIFM,1188
52
+ nkululeko/autopredict/ap_text.py,sha256=zaz9qIg90-ghZhBe1ka0HoUnap6s6RyopUKoCpttHOU,1333
50
53
  nkululeko/autopredict/ap_valence.py,sha256=9S06SpO_zXKSpkf0InHYYXZcD9HDGoCJ6UPkn__eBAg,1027
51
54
  nkululeko/autopredict/estimate_snr.py,sha256=1k9-XadABudnsNOeFZD_Fg0E64-GUQVS7JEp82MLQS4,4995
55
+ nkululeko/autopredict/whisper_transcriber.py,sha256=DWDvpRaV5KmUF18ojPEvxnVXm_h_nWyY-TfW2Ngd5N8,2941
56
+ nkululeko/autopredict/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
+ nkululeko/autopredict/tests/test_whisper_transcriber.py,sha256=ilas6j3OUvq_xnQCRZgytQCtyrpNU6tvG5a8kPvVKBQ,5085
52
58
  nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
59
  nkululeko/data/dataset.py,sha256=JLbBYGniUrjwxs-HtbIyhqO3Cv-ELfpmlq7jzij4dBc,41759
54
60
  nkululeko/data/dataset_csv.py,sha256=AIbtB6pGk5BSQGIgfokZ7tEGFjmuOq5w2XumRSimVWs,4833
55
61
  nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
62
  nkululeko/feat_extract/feats_agender.py,sha256=onfAQ6-xx_mFMJXEF1IX8cHBmGtGeX6weJmxbkfh1_o,3184
57
63
  nkululeko/feat_extract/feats_agender_agender.py,sha256=_YQv1qw--3uQfnyTQDCwlmPRnrhdMhgXbYK2yQtseW0,3464
58
- nkululeko/feat_extract/feats_analyser.py,sha256=txuIEgO4uprle35RzBczvZm5Hc7iUl2p9oBEfdrvg_I,13506
64
+ nkululeko/feat_extract/feats_analyser.py,sha256=lodim7qQ8M7c3iMeJ5bHQ-nCy9Cehx1Xl5K3leii6-w,14768
59
65
  nkululeko/feat_extract/feats_ast.py,sha256=w62xEoLiFtU-rj6SXkqXAktmoFaXcAcAWpUyEjp8JWo,4652
60
66
  nkululeko/feat_extract/feats_auddim.py,sha256=CGLp_aYhudfwoU5522vjrvjPxfZcyw593A8xLjYefV8,3134
61
67
  nkululeko/feat_extract/feats_audmodel.py,sha256=OsZyB1rdcG0Fai2gAwBlbuubmWor1_-P4IDkZLqgPKE,3161
62
68
  nkululeko/feat_extract/feats_clap.py,sha256=1tttpfm2SJmQgYm2u8eUVpDiDOpWdKqFChpY3ZZokNs,3395
63
- nkululeko/feat_extract/feats_emotion2vec.py,sha256=ObVlqbsJsw-hWGsUOXY68Ebynt5Bn4Xtlu_Gvq3XJI4,8728
69
+ nkululeko/feat_extract/feats_emotion2vec.py,sha256=LnV8xEg7L7HIDqz0ulqUNoaAHBU0d5gyQPb2_32T_18,9694
64
70
  nkululeko/feat_extract/feats_hubert.py,sha256=F3vrPCkx8EimJjFWYCZ7Yg9uo1G3NjYt4UKrGIUev8k,5172
65
71
  nkululeko/feat_extract/feats_import.py,sha256=cPi4XRuRs71npB8YGXr7rYOvkeTU_oZEl3GrGncdiqY,2222
66
72
  nkululeko/feat_extract/feats_mld.py,sha256=5aRoYiGDm5ApoFntxAMQYPjEelXHHRBHZcAJR9dxaeI,1945
67
73
  nkululeko/feat_extract/feats_mos.py,sha256=vkH1FdXtduoU0-yjBtVccC2b_p_eyH8laRnwlL7QTVM,4136
68
- nkululeko/feat_extract/feats_opensmile copy.py,sha256=BLj5sUaBPz7vLPfNlt9LdQurSypmViqgSpPK-6aXGhQ,4029
69
74
  nkululeko/feat_extract/feats_opensmile.py,sha256=HwbGs0EaPxZ7DznQZFem8RYgyQWz02oya77uVY7KhZE,9203
70
75
  nkululeko/feat_extract/feats_oxbow.py,sha256=TRoEJx5EKZiqoPoPRibHc0vkBMoZcKlGoGNq4NbyHZw,4895
71
- nkululeko/feat_extract/feats_praat.py,sha256=jZ-XXbP3iy25QQIzA4Hrv0HxsYvJNPavoCW2FyJNKMg,3064
76
+ nkululeko/feat_extract/feats_praat.py,sha256=3j1xySKqW74USjk8DweWAajHeTcuszKCFY1htQhe1cY,3070
77
+ nkululeko/feat_extract/feats_praat_core.py,sha256=Q0OVuo5h38a860yflzRtUpy0J0w7WCg0aBLrDhIskFc,28524
72
78
  nkululeko/feat_extract/feats_snr.py,sha256=Zxwo78HLleNsziYLOj34RQUnp9I7r1yMXqjYipDOjZw,2761
73
79
  nkululeko/feat_extract/feats_spectra.py,sha256=6WhFUpB0WTutg7OFMlAw9lSwVU5OBYCDcPRxaiH-Qn8,3621
74
80
  nkululeko/feat_extract/feats_spkrec.py,sha256=o_6bdU4lIkj64S5Kdjf1iyuo1VASeYxE4XdxV94a8gE,4732
@@ -78,13 +84,15 @@ nkululeko/feat_extract/feats_wav2vec2.py,sha256=q1QzMD3KbhF2SOmxdwI7CiViRmhlFRyg
78
84
  nkululeko/feat_extract/feats_wavlm.py,sha256=O9cfc39VF5aPJRRATKb37pHT4W11i2cu5O1mY9LOjIA,4755
79
85
  nkululeko/feat_extract/feats_whisper.py,sha256=n3ESZtva7wshs8E8diBlQYa9xCH_P0UY1DncSrxz-FY,4508
80
86
  nkululeko/feat_extract/featureset.py,sha256=clcBv9rzBRW-bfw7JC_FYTjU5uUS-c0UE1XtQLYYRiE,1615
81
- nkululeko/feat_extract/feinberg_praat.py,sha256=bgzWtQkKbgcygrzwAxDXosui1rcc38qhWuJq9GLr0z8,21308
82
87
  nkululeko/feat_extract/transformer_feature_extractor.py,sha256=LaXuW-AJZ931ttLis0J5h9N3RtiiE51BnkxJR-bubfY,5837
88
+ nkululeko/feat_extract/tests/__init__.py,sha256=pzjkYs1PNo7107jIXKa_xwdBR2SKxzkg53a9W3bvbpw,32
89
+ nkululeko/feat_extract/tests/test_feats_opensmile.py,sha256=eYjGBsH6UkuRleKzGZHNv2cXRZz2xPCw0dkTfXw5S9s,5761
90
+ nkululeko/feat_extract/tests/test_feats_praat_core.py,sha256=ntbpIrehr4D-lOvaE0hNCe-og5sN4syBGBUTuNGZpDo,20916
83
91
  nkululeko/losses/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
92
  nkululeko/losses/loss_ccc.py,sha256=NOK0y0fxKUnU161B5geap6Fmn8QzoPl2MqtPiV8IuJE,976
85
93
  nkululeko/losses/loss_softf1loss.py,sha256=5gW-PuiqeAZcRgfwjueIOQtMokOjZWgQnVIv59HKTCo,1309
86
94
  nkululeko/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
87
- nkululeko/models/model.py,sha256=0O6H-kME1yVHU-EKu5iOZVBB7fFNg3lfagvGgMrldxM,14426
95
+ nkululeko/models/model.py,sha256=2STBD3jtLKeNSk7arCFJdaV6FL-nuLR1qpsjvZ4W-9A,12975
88
96
  nkululeko/models/model_bayes.py,sha256=tQUXEsXoS6WnfapQjP78S_gxNBssTOqE78A2iG8SfLU,407
89
97
  nkululeko/models/model_cnn.py,sha256=TKj43865epsiK7a0COyfBDaFHKOYgWgnPpMVCPWUhCM,10497
90
98
  nkululeko/models/model_gmm.py,sha256=mhHFNtTzHuJvqYSA0h5YhvjA--KhnN6MTU_S0G3-d1c,1332
@@ -97,30 +105,35 @@ nkululeko/models/model_svm.py,sha256=zP8ykLhCZTYvwSqw06XHuzq9qMBtsiYpxjUpWDAnMyA
97
105
  nkululeko/models/model_svr.py,sha256=FEwYRdgqwgGhZdkpRnT7Ef12lklWi6GZL28PyV99xWs,726
98
106
  nkululeko/models/model_tree.py,sha256=6L3PD3aIiiQz1RPWS6z3Edx4f0gnR7AOfBKOJzf0BNU,433
99
107
  nkululeko/models/model_tree_reg.py,sha256=IMaQpNImoRqP8Biw1CsJevxpV_PVpKblsKtYlMW5d_U,429
100
- nkululeko/models/model_tuned.py,sha256=VuRyNqw3XTpQ2eHsWOJN8X-V98AN8Wqiq7UgwT5BQRU,23763
101
- nkululeko/models/model_xgb.py,sha256=zfZM3lqH5uttVB18b1MRIhP9CCeCuIh1ycgOuFMcqUM,449
108
+ nkululeko/models/model_tuned.py,sha256=74c_pQUtpx_x8bM3r5ufuqhaaQxfy6KRUqirdzSac-Q,35999
109
+ nkululeko/models/model_xgb.py,sha256=_VxFFP1QcoyxrwvJSrzdIwwDt85IulUWvg1BxXBgN1Y,6616
102
110
  nkululeko/models/model_xgr.py,sha256=H01FJCRgmX2unvambMs5TTCS9sI6VDB9ip9G6rVGt2c,419
103
111
  nkululeko/models/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
112
+ nkululeko/models/tests/test_model_knn.py,sha256=hFCJ0C0taQO-fwA7j8HcFrwCSluSb6Vg4NCQQ_zL4bc,1793
113
+ nkululeko/models/tests/test_model_mlp.py,sha256=XVvniKAtroxLRKyYGW-ew1mHuRo3_cWk4nGnXQ5aDEk,4977
104
114
  nkululeko/models/tests/test_model_svm.py,sha256=spDlZmeBKBdK4EFBpOgEkaAfGeGH9kau6CqSWOY6Uag,1856
115
+ nkululeko/models/tests/test_model_xgb.py,sha256=-Rz5YTeqUJ4Kwdh5ny31c3zxsUJXTypR4L3ItoOU7yU,1036
105
116
  nkululeko/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
106
117
  nkululeko/reporting/defines.py,sha256=0vh-Tlx4fAPpk1o6mP_4x3EkIoqzYMr38IZnj-JM5z4,641
107
118
  nkululeko/reporting/latex_writer.py,sha256=NGwSIfd4nfslDkNUOSZSdqY_VDLA8634thyhe-vj1bY,1824
108
- nkululeko/reporting/report.py,sha256=bYN8B66gg3IWHAyfd6uIVjpYKy3rOI6aEwgfXU0LSAY,1006
119
+ nkululeko/reporting/report.py,sha256=B5eoIKMz46VKDBsi7M9u_iegzAD-E3eGCmolzSFjZ3c,1118
109
120
  nkululeko/reporting/report_item.py,sha256=drkknsyFhGviaPJNmPQtCXJmRhTSSfjNcJt0Bls6JAA,533
110
- nkululeko/reporting/reporter.py,sha256=-VyV0TZ0vBAx6UZNegnKS3i3WpkF27ntBRlYvp9NNiQ,20174
121
+ nkululeko/reporting/reporter.py,sha256=e-piNtnv0QUWKs9Ha_d4CzgqJxPBG9XBm3Ru8y0ot-U,20896
111
122
  nkululeko/reporting/result.py,sha256=G63a2tHCwHhM6NBJgYzsWKWJm4Yu3r4hsCHA2Km7eHU,1073
112
123
  nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
113
124
  nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=b3t0zdpJYofKWMyKRMtMMX91xeR-k8d5pbnNaQHcsOE,1902
114
125
  nkululeko/segmenting/seg_pyannote.py,sha256=6IPbgjnGOz9juzEKDTZN3PSipX4t6Mz-DILAx3rp5do,4216
115
126
  nkululeko/segmenting/seg_silero.py,sha256=ulodnvtRq5MLHDxy_RmAK4tJg6h1d-mPq-uCPFkGVKg,4258
127
+ nkululeko/tests/__init__.py,sha256=XzD6C-ZuewsccUwx7KzEUtUxJrRx2d7sPFViscjf1O0,30
128
+ nkululeko/tests/test_balancing.py,sha256=21110R77iTcSWKiSTxYDkJ26lxPFTlZf_ZwVjeiSh4w,10164
116
129
  nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
117
130
  nkululeko/utils/files.py,sha256=SrrYaU7AB80MZHiV1jcB0h_zigvYLYgSVNTXV4ao38g,4593
118
131
  nkululeko/utils/stats.py,sha256=3Fyx8q8BSKYmiufT6OkRug9RATWmGrr9BaX_y8jziWo,3074
119
132
  nkululeko/utils/unzip.py,sha256=G68f5120TjwACZC3bQcneMniddnwubPbBdMc2L5KBOo,1206
120
- nkululeko/utils/util.py,sha256=6NDKhOx0fV5fKyhSoY4hem96p7OuPcmhCDQR9EzkQhw,17829
121
- nkululeko-0.94.3.dist-info/licenses/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
122
- nkululeko-0.94.3.dist-info/METADATA,sha256=QeZ9ZMTqwgdDvwRTCvgFO7X55_J84AWZh7jVf9uV-6M,2874
123
- nkululeko-0.94.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
124
- nkululeko-0.94.3.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
125
- nkululeko-0.94.3.dist-info/top_level.txt,sha256=bf1k1YKkqcXemNX_cUgoyKqQ3_GVErPqAY-53J36jkM,19
126
- nkululeko-0.94.3.dist-info/RECORD,,
133
+ nkululeko/utils/util.py,sha256=o62TZRcxO1VflINai6ojEzSmcbXIFInNLGogSbqJgiA,18561
134
+ nkululeko-0.95.1.dist-info/licenses/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
135
+ nkululeko-0.95.1.dist-info/METADATA,sha256=KhJ1JPenNsZGUIhdeYGvNKrM1H-ioqONAh06LpxdnMQ,2874
136
+ nkululeko-0.95.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
137
+ nkululeko-0.95.1.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
138
+ nkululeko-0.95.1.dist-info/top_level.txt,sha256=bf1k1YKkqcXemNX_cUgoyKqQ3_GVErPqAY-53J36jkM,19
139
+ nkululeko-0.95.1.dist-info/RECORD,,
@@ -1,93 +0,0 @@
1
- # opensmileset.py
2
- import os
3
-
4
- import opensmile
5
- import pandas as pd
6
-
7
- import nkululeko.glob_conf as glob_conf
8
- from nkululeko.feat_extract.featureset import Featureset
9
-
10
-
11
- class Opensmileset(Featureset):
12
- def __init__(self, name, data_df, feats_type=None, config_file=None):
13
- super().__init__(name, data_df, feats_type)
14
- self.featset = self.util.config_val("FEATS", "set", "eGeMAPSv02")
15
- try:
16
- self.feature_set = eval(f"opensmile.FeatureSet.{self.featset}")
17
- # 'eGeMAPSv02, ComParE_2016, GeMAPSv01a, eGeMAPSv01a':
18
- except AttributeError:
19
- self.util.error(f"something is wrong with feature set: {self.featset}")
20
- self.featlevel = self.util.config_val("FEATS", "level", "functionals")
21
- try:
22
- self.featlevel = self.featlevel.replace("lld", "LowLevelDescriptors")
23
- self.featlevel = self.featlevel.replace("functionals", "Functionals")
24
- self.feature_level = eval(f"opensmile.FeatureLevel.{self.featlevel}")
25
- except AttributeError:
26
- self.util.error(f"something is wrong with feature level: {self.featlevel}")
27
-
28
- def extract(self):
29
- """Extract the features based on the initialized dataset or re-open them when found on disk."""
30
- store = self.util.get_path("store")
31
- store_format = self.util.config_val("FEATS", "store_format", "pkl")
32
- storage = f"{store}{self.name}.{store_format}"
33
- extract = eval(
34
- self.util.config_val("FEATS", "needs_feature_extraction", "False")
35
- )
36
- no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
37
- if extract or not os.path.isfile(storage) or no_reuse:
38
- self.util.debug("extracting openSmile features, this might take a while...")
39
- smile = opensmile.Smile(
40
- feature_set=self.feature_set,
41
- feature_level=self.feature_level,
42
- num_workers=self.n_jobs,
43
- verbose=True,
44
- )
45
- if isinstance(self.data_df.index, pd.MultiIndex):
46
- self.df = smile.process_index(self.data_df.index)
47
- self.df = self.df.set_index(self.data_df.index)
48
- else:
49
- self.df = smile.process_files(self.data_df.index)
50
- self.df.index = self.df.index.droplevel(1)
51
- self.df.index = self.df.index.droplevel(1)
52
- self.util.write_store(self.df, storage, store_format)
53
- try:
54
- glob_conf.config["DATA"]["needs_feature_extraction"] = "False"
55
- except KeyError:
56
- pass
57
- else:
58
- self.util.debug(f"reusing extracted OS features: {storage}.")
59
- self.df = self.util.get_store(storage, store_format)
60
-
61
- def extract_sample(self, signal, sr):
62
- smile = opensmile.Smile(
63
- feature_set=self.feature_set,
64
- feature_level=opensmile.FeatureLevel.Functionals,
65
- )
66
- feats = smile.process_signal(signal, sr)
67
- return feats.to_numpy()
68
-
69
- # def filter(self):
70
- # # use only the features that are indexed in the target dataframes
71
- # self.df = self.df[self.df.index.isin(self.data_df.index)]
72
- # try:
73
- # # use only some features
74
- # selected_features = ast.literal_eval(
75
- # glob_conf.config["FEATS"]["os.features"]
76
- # )
77
- # self.util.debug(f"selecting features from opensmile: {selected_features}")
78
- # sel_feats_df = pd.DataFrame()
79
- # hit = False
80
- # for feat in selected_features:
81
- # try:
82
- # sel_feats_df[feat] = self.df[feat]
83
- # hit = True
84
- # except KeyError:
85
- # pass
86
- # if hit:
87
- # self.df = sel_feats_df
88
- # self.util.debug(
89
- # "new feats shape after selecting opensmile features:"
90
- # f" {self.df.shape}"
91
- # )
92
- # except KeyError:
93
- # pass