nkululeko 0.95.2__py3-none-any.whl → 0.95.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/data/dataset.py +14 -1
- nkululeko/experiment.py +1 -1
- nkululeko/modelrunner.py +10 -5
- nkululeko/models/model_mlp_regression.py +3 -0
- nkululeko/nkuluflag.py +1 -1
- nkululeko/optim.py +3 -871
- nkululeko/optimizationrunner.py +1118 -0
- nkululeko/reporting/reporter.py +9 -1
- nkululeko/{test.py → testing.py} +4 -3
- nkululeko/{test_predictor.py → testing_predictor.py} +2 -2
- nkululeko/{test_pretrain.py → testing_pretrain.py} +2 -5
- nkululeko/utils/util.py +2 -2
- {nkululeko-0.95.2.dist-info → nkululeko-0.95.4.dist-info}/METADATA +2 -2
- {nkululeko-0.95.2.dist-info → nkululeko-0.95.4.dist-info}/RECORD +19 -18
- {nkululeko-0.95.2.dist-info → nkululeko-0.95.4.dist-info}/WHEEL +0 -0
- {nkululeko-0.95.2.dist-info → nkululeko-0.95.4.dist-info}/entry_points.txt +0 -0
- {nkululeko-0.95.2.dist-info → nkululeko-0.95.4.dist-info}/licenses/LICENSE +0 -0
- {nkululeko-0.95.2.dist-info → nkululeko-0.95.4.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.95.
|
1
|
+
VERSION="0.95.4"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/data/dataset.py
CHANGED
@@ -6,7 +6,6 @@ from random import sample
|
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
9
|
-
|
10
9
|
import audformat
|
11
10
|
|
12
11
|
from nkululeko.filter_data import DataFilter
|
@@ -905,6 +904,20 @@ class Dataset:
|
|
905
904
|
mappings = self.util.config_val_data(self.name, "mapping", False)
|
906
905
|
if mappings:
|
907
906
|
mapping = ast.literal_eval(mappings)
|
907
|
+
# mapping should be a dictionary, the keys might encode lists.
|
908
|
+
keys = list(mapping.keys())
|
909
|
+
for key in keys:
|
910
|
+
# a comma in the key means that the key is a list of labels
|
911
|
+
if "," in key:
|
912
|
+
# split the key and create a list
|
913
|
+
key_list = [k.strip() for k in key.split(",")]
|
914
|
+
# create a new mapping for each key
|
915
|
+
for k in key_list:
|
916
|
+
mapping[k] = mapping[key]
|
917
|
+
# remove the old key
|
918
|
+
del mapping[key]
|
919
|
+
if pd.api.types.is_numeric_dtype(df[target]):
|
920
|
+
df[target] = df[target].astype("string")
|
908
921
|
df[target] = df[target].map(mapping)
|
909
922
|
# remove any exiting nan values after mapping
|
910
923
|
df = df.dropna()
|
nkululeko/experiment.py
CHANGED
@@ -23,7 +23,7 @@ from nkululeko.plots import Plots
|
|
23
23
|
from nkululeko.reporting.report import Report
|
24
24
|
from nkululeko.runmanager import Runmanager
|
25
25
|
from nkululeko.scaler import Scaler
|
26
|
-
from nkululeko.
|
26
|
+
from nkululeko.testing_predictor import TestPredictor
|
27
27
|
from nkululeko.utils.util import Util
|
28
28
|
|
29
29
|
|
nkululeko/modelrunner.py
CHANGED
@@ -250,14 +250,19 @@ class Modelrunner:
|
|
250
250
|
balancing = self.util.config_val("FEATS", "balancing", False)
|
251
251
|
if balancing:
|
252
252
|
self.util.debug("Applying feature balancing using DataBalancer")
|
253
|
-
|
254
|
-
#
|
255
|
-
|
256
|
-
|
253
|
+
|
254
|
+
# Get random state from config, fallback to 42 for backward compatibility
|
255
|
+
random_state = int(
|
256
|
+
self.util.config_val("FEATS", "balancing_random_state", 42)
|
257
|
+
)
|
258
|
+
|
259
|
+
# Initialize the data balancer with configurable random state
|
260
|
+
balancer = DataBalancer(random_state=random_state)
|
261
|
+
|
257
262
|
# Apply balancing
|
258
263
|
self.df_train, self.feats_train = balancer.balance_features(
|
259
264
|
df_train=self.df_train,
|
260
265
|
feats_train=self.feats_train,
|
261
266
|
target_column=self.target,
|
262
|
-
method=balancing
|
267
|
+
method=balancing,
|
263
268
|
)
|
@@ -174,6 +174,9 @@ class MLP_Reg_model(Model):
|
|
174
174
|
logits = model(features.to(device)).reshape(-1)
|
175
175
|
loss = self.criterion(logits, labels.to(device))
|
176
176
|
# print(f'loss: {loss.item()}')
|
177
|
+
if torch.isnan(loss):
|
178
|
+
# possible that ccc returns NaN if batch contains only one value
|
179
|
+
continue
|
177
180
|
losses.append(loss.item())
|
178
181
|
optimizer.zero_grad()
|
179
182
|
loss.backward()
|