nkululeko 0.89.2__py3-none-any.whl → 0.90.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. nkululeko/aug_train.py +6 -4
  2. nkululeko/augment.py +8 -6
  3. nkululeko/augmenting/augmenter.py +4 -4
  4. nkululeko/augmenting/randomsplicer.py +12 -9
  5. nkululeko/augmenting/randomsplicing.py +2 -3
  6. nkululeko/augmenting/resampler.py +9 -6
  7. nkululeko/autopredict/ap_age.py +4 -2
  8. nkululeko/autopredict/ap_arousal.py +4 -2
  9. nkululeko/autopredict/ap_dominance.py +3 -2
  10. nkululeko/autopredict/ap_gender.py +4 -2
  11. nkululeko/autopredict/ap_mos.py +5 -2
  12. nkululeko/autopredict/ap_pesq.py +5 -2
  13. nkululeko/autopredict/ap_sdr.py +5 -2
  14. nkululeko/autopredict/ap_snr.py +5 -2
  15. nkululeko/autopredict/ap_stoi.py +5 -2
  16. nkululeko/autopredict/ap_valence.py +4 -2
  17. nkululeko/autopredict/estimate_snr.py +10 -14
  18. nkululeko/cacheddataset.py +1 -1
  19. nkululeko/constants.py +1 -1
  20. nkululeko/data/dataset.py +19 -16
  21. nkululeko/data/dataset_csv.py +5 -3
  22. nkululeko/demo-ft.py +29 -0
  23. nkululeko/demo_feats.py +5 -4
  24. nkululeko/demo_predictor.py +3 -4
  25. nkululeko/ensemble.py +27 -28
  26. nkululeko/experiment.py +11 -7
  27. nkululeko/experiment_felix.py +728 -0
  28. nkululeko/explore.py +1 -0
  29. nkululeko/export.py +7 -5
  30. nkululeko/feat_extract/feats_agender.py +5 -4
  31. nkululeko/feat_extract/feats_agender_agender.py +7 -6
  32. nkululeko/feat_extract/feats_analyser.py +18 -16
  33. nkululeko/feat_extract/feats_ast.py +9 -8
  34. nkululeko/feat_extract/feats_auddim.py +3 -5
  35. nkululeko/feat_extract/feats_audmodel.py +2 -2
  36. nkululeko/feat_extract/feats_clap.py +9 -12
  37. nkululeko/feat_extract/feats_hubert.py +2 -3
  38. nkululeko/feat_extract/feats_import.py +5 -4
  39. nkululeko/feat_extract/feats_mld.py +3 -5
  40. nkululeko/feat_extract/feats_mos.py +4 -3
  41. nkululeko/feat_extract/feats_opensmile.py +4 -3
  42. nkululeko/feat_extract/feats_oxbow.py +5 -4
  43. nkululeko/feat_extract/feats_praat.py +4 -7
  44. nkululeko/feat_extract/feats_snr.py +3 -5
  45. nkululeko/feat_extract/feats_spectra.py +8 -9
  46. nkululeko/feat_extract/feats_spkrec.py +6 -11
  47. nkululeko/feat_extract/feats_squim.py +2 -4
  48. nkululeko/feat_extract/feats_trill.py +2 -5
  49. nkululeko/feat_extract/feats_wav2vec2.py +8 -4
  50. nkululeko/feat_extract/feats_wavlm.py +2 -3
  51. nkululeko/feat_extract/feats_whisper.py +4 -6
  52. nkululeko/feat_extract/featureset.py +4 -2
  53. nkululeko/feat_extract/feinberg_praat.py +1 -3
  54. nkululeko/feat_extract/transformer_feature_extractor.py +147 -0
  55. nkululeko/file_checker.py +3 -3
  56. nkululeko/filter_data.py +3 -1
  57. nkululeko/fixedsegment.py +83 -0
  58. nkululeko/models/model.py +3 -5
  59. nkululeko/models/model_bayes.py +1 -0
  60. nkululeko/models/model_cnn.py +4 -6
  61. nkululeko/models/model_gmm.py +13 -9
  62. nkululeko/models/model_knn.py +1 -0
  63. nkululeko/models/model_knn_reg.py +1 -0
  64. nkululeko/models/model_lin_reg.py +1 -0
  65. nkululeko/models/model_mlp.py +2 -3
  66. nkululeko/models/model_mlp_regression.py +1 -6
  67. nkululeko/models/model_svm.py +2 -2
  68. nkululeko/models/model_svr.py +1 -0
  69. nkululeko/models/model_tree.py +2 -3
  70. nkululeko/models/model_tree_reg.py +1 -0
  71. nkululeko/models/model_tuned.py +54 -33
  72. nkululeko/models/model_xgb.py +1 -0
  73. nkululeko/models/model_xgr.py +1 -0
  74. nkululeko/multidb.py +1 -0
  75. nkululeko/nkululeko.py +1 -1
  76. nkululeko/plots.py +1 -1
  77. nkululeko/predict.py +4 -5
  78. nkululeko/reporting/defines.py +6 -8
  79. nkululeko/reporting/latex_writer.py +3 -3
  80. nkululeko/reporting/report.py +2 -2
  81. nkululeko/reporting/report_item.py +1 -0
  82. nkululeko/reporting/reporter.py +20 -19
  83. nkululeko/resample.py +8 -12
  84. nkululeko/resample_cli.py +99 -0
  85. nkululeko/runmanager.py +3 -1
  86. nkululeko/scaler.py +1 -1
  87. nkululeko/segment.py +6 -5
  88. nkululeko/segmenting/seg_inaspeechsegmenter.py +3 -3
  89. nkululeko/segmenting/seg_silero.py +4 -4
  90. nkululeko/syllable_nuclei.py +9 -22
  91. nkululeko/test_pretrain.py +6 -7
  92. nkululeko/utils/stats.py +0 -1
  93. nkululeko/utils/util.py +2 -3
  94. {nkululeko-0.89.2.dist-info → nkululeko-0.90.1.dist-info}/METADATA +12 -2
  95. nkululeko-0.90.1.dist-info/RECORD +119 -0
  96. {nkululeko-0.89.2.dist-info → nkululeko-0.90.1.dist-info}/WHEEL +1 -1
  97. nkululeko-0.89.2.dist-info/RECORD +0 -114
  98. {nkululeko-0.89.2.dist-info → nkululeko-0.90.1.dist-info}/LICENSE +0 -0
  99. {nkululeko-0.89.2.dist-info → nkululeko-0.90.1.dist-info}/top_level.txt +0 -0
@@ -8,11 +8,10 @@ import pandas as pd
8
8
  import torch
9
9
  import torchaudio
10
10
  from tqdm import tqdm
11
- from transformers import Wav2Vec2FeatureExtractor
12
- from transformers import WavLMModel
11
+ from transformers import Wav2Vec2FeatureExtractor, WavLMModel
13
12
 
14
- from nkululeko.feat_extract.featureset import Featureset
15
13
  import nkululeko.glob_conf as glob_conf
14
+ from nkululeko.feat_extract.featureset import Featureset
16
15
 
17
16
 
18
17
  class Wavlm(Featureset):
@@ -1,16 +1,14 @@
1
1
  # feats_whisper.py
2
2
  import os
3
3
 
4
- import pandas as pd
5
- import torch
6
- from transformers import AutoFeatureExtractor
7
- from transformers import WhisperModel
8
-
9
4
  import audeer
10
5
  import audiofile
6
+ import pandas as pd
7
+ import torch
8
+ from transformers import AutoFeatureExtractor, WhisperModel
11
9
 
12
- from nkululeko.feat_extract.featureset import Featureset
13
10
  import nkululeko.glob_conf as glob_conf
11
+ from nkululeko.feat_extract.featureset import Featureset
14
12
 
15
13
 
16
14
  class Whisper(Featureset):
@@ -1,8 +1,10 @@
1
1
  # featureset.py
2
+ import ast
3
+
2
4
  import pandas as pd
3
- from nkululeko.utils.util import Util
5
+
4
6
  import nkululeko.glob_conf as glob_conf
5
- import ast
7
+ from nkululeko.utils.util import Util
6
8
 
7
9
 
8
10
  class Featureset:
@@ -7,6 +7,7 @@ taken June 23rd 2022.
7
7
  import math
8
8
  import statistics
9
9
 
10
+ import audiofile
10
11
  import numpy as np
11
12
  import pandas as pd
12
13
  import parselmouth
@@ -15,9 +16,6 @@ from scipy.stats.mstats import zscore
15
16
  from sklearn.decomposition import PCA
16
17
  from tqdm import tqdm
17
18
 
18
- import audiofile
19
-
20
-
21
19
  # This is the function to measure source acoustics using default male parameters.
22
20
 
23
21
 
@@ -0,0 +1,147 @@
1
+ # transformer_feature_extractor.py
2
+
3
+ import os
4
+
5
+ import pandas as pd
6
+ import torch
7
+ import torchaudio
8
+ from tqdm import tqdm
9
+
10
+ import nkululeko.glob_conf as glob_conf
11
+ from nkululeko.feat_extract.featureset import Featureset
12
+
13
+
14
+ class TransformerFeatureExtractor(Featureset):
15
+ def __init__(self, name, data_df, feat_type):
16
+ super().__init__(name, data_df, feat_type)
17
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
18
+ self.device = self.util.config_val("MODEL", "device", cuda)
19
+ self.model_initialized = False
20
+ self.feat_type = feat_type
21
+
22
+ def init_model(self):
23
+ raise NotImplementedError("Subclasses must implement init_model method")
24
+
25
+ def get_embeddings(self, signal, sampling_rate, file):
26
+ try:
27
+ with torch.no_grad():
28
+ # Preprocess the input
29
+ inputs = self.preprocess_input(signal, sampling_rate)
30
+
31
+ # Get model outputs
32
+ outputs = self.model(**inputs)
33
+
34
+ # Extract the relevant hidden states
35
+ hidden_states = self.extract_hidden_states(outputs)
36
+
37
+ # Pool the hidden states
38
+ embeddings = self.pool_hidden_states(hidden_states)
39
+
40
+ # Convert to numpy and flatten
41
+ embeddings = embeddings.cpu().numpy().ravel()
42
+
43
+ return embeddings
44
+
45
+ except Exception as e:
46
+ self.util.error(f"Error extracting embeddings for file {file}: {str(e)}")
47
+ return np.zeros(self.get_embedding_dim()) # Return zero vector on error
48
+
49
+ def preprocess_input(self, signal, sampling_rate):
50
+ # This method should be implemented by subclasses
51
+ raise NotImplementedError("Subclasses must implement preprocess_input method")
52
+
53
+ def extract_hidden_states(self, outputs):
54
+ # This method should be implemented by subclasses
55
+ raise NotImplementedError(
56
+ "Subclasses must implement extract_hidden_states method"
57
+ )
58
+
59
+ def pool_hidden_states(self, hidden_states):
60
+ # Default implementation: mean pooling over time dimension
61
+ return torch.mean(hidden_states, dim=1)
62
+
63
+ def get_embedding_dim(self):
64
+ # This method should be implemented by subclasses
65
+ raise NotImplementedError("Subclasses must implement get_embedding_dim method")
66
+
67
+ def extract(self):
68
+ store = self.util.get_path("store")
69
+ storage = f"{store}{self.name}.pkl"
70
+ extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
71
+ no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
72
+ if extract or no_reuse or not os.path.isfile(storage):
73
+ if not self.model_initialized:
74
+ self.init_model()
75
+ self.util.debug(
76
+ f"extracting {self.feat_type} embeddings, this might take a while..."
77
+ )
78
+ emb_series = pd.Series(index=self.data_df.index, dtype=object)
79
+ for idx, (file, start, end) in enumerate(
80
+ tqdm(self.data_df.index.to_list())
81
+ ):
82
+ signal, sampling_rate = torchaudio.load(
83
+ file,
84
+ frame_offset=int(start.total_seconds() * 16000),
85
+ num_frames=int((end - start).total_seconds() * 16000),
86
+ )
87
+ assert sampling_rate == 16000, f"got {sampling_rate} instead of 16000"
88
+ emb = self.get_embeddings(signal, sampling_rate, file)
89
+ emb_series[idx] = emb
90
+ self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
91
+ self.df.to_pickle(storage)
92
+ try:
93
+ glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
94
+ except KeyError:
95
+ pass
96
+ else:
97
+ self.util.debug(f"reusing extracted {self.feat_type} embeddings")
98
+ self.df = pd.read_pickle(storage)
99
+ if self.df.isnull().values.any():
100
+ self.util.error(
101
+ f"got nan: {self.df.shape} {self.df.isnull().sum().sum()}"
102
+ )
103
+
104
+ def extract_sample(self, signal, sr):
105
+ self.init_model()
106
+ feats = self.get_embeddings(signal, sr, "no file")
107
+ return feats
108
+
109
+ # for each feature extractor
110
+ # feats_ast.py
111
+
112
+ # class Ast(TransformerFeatureExtractor):
113
+ # def preprocess_input(self, signal, sampling_rate):
114
+ # inputs = self.processor(signal.numpy(), sampling_rate=sampling_rate, return_tensors="pt")
115
+ # return {k: v.to(self.device) for k, v in inputs.items()}
116
+
117
+ # def extract_hidden_states(self, outputs):
118
+ # return outputs.last_hidden_state
119
+
120
+ # def get_embedding_dim(self):
121
+ # return self.model.config.hidden_size
122
+
123
+ # # feats_wav2vec2.py
124
+
125
+ # class Wav2vec2(TransformerFeatureExtractor):
126
+ # def preprocess_input(self, signal, sampling_rate):
127
+ # inputs = self.processor(signal, sampling_rate=sampling_rate, return_tensors="pt")
128
+ # return {k: v.to(self.device) for k, v in inputs.items()}
129
+
130
+ # def extract_hidden_states(self, outputs):
131
+ # return outputs.last_hidden_state
132
+
133
+ # def get_embedding_dim(self):
134
+ # return self.model.config.hidden_size
135
+
136
+ # # feats_wavlm.py
137
+
138
+ # class Wavlm(TransformerFeatureExtractor):
139
+ # def preprocess_input(self, signal, sampling_rate):
140
+ # inputs = self.processor(signal, sampling_rate=sampling_rate, return_tensors="pt")
141
+ # return {k: v.to(self.device) for k, v in inputs.items()}
142
+
143
+ # def extract_hidden_states(self, outputs):
144
+ return outputs.last_hidden_state
145
+
146
+ # def get_embedding_dim(self):
147
+ # return self.model.config.hidden_size
nkululeko/file_checker.py CHANGED
@@ -1,7 +1,7 @@
1
- import pandas as pd
2
- from nkululeko.utils.util import Util
3
1
  import os
4
2
 
3
+ from nkululeko.utils.util import Util
4
+
5
5
 
6
6
  class FileChecker:
7
7
  def __init__(self, df):
@@ -10,7 +10,7 @@ class FileChecker:
10
10
  self.util.copy_flags(df, self.df)
11
11
  check_vad = self.util.config_val("DATA", "check_vad", False)
12
12
  if check_vad:
13
- self.util.debug(f"This may take a while downloading the VAD model")
13
+ self.util.debug("This may take a while downloading the VAD model")
14
14
  import torch
15
15
 
16
16
  torch.set_num_threads(1)
nkululeko/filter_data.py CHANGED
@@ -1,8 +1,10 @@
1
+ import ast
2
+
1
3
  import audformat
2
4
  import pandas as pd
5
+
3
6
  import nkululeko.glob_conf as glob_conf
4
7
  from nkululeko.utils.util import Util
5
- import ast
6
8
 
7
9
 
8
10
  class DataFilter:
@@ -0,0 +1,83 @@
1
+ """
2
+ Segment audio files in a given directory into smaller chunks with a specified length and overlap. This requires the PyDub library to be installed.
3
+
4
+ Args:
5
+ input_dir (str): The directory containing the audio files to be segmented.
6
+ output_dir (str): The directory where the segmented audio files will be saved.
7
+ segment_length (int): The length of each audio segment in milliseconds.
8
+ overlap (int): The overlap between adjacent audio segments in milliseconds.
9
+
10
+ This function will recursively search the input directory for all .wav audio files, and then segment each file into smaller chunks with the specified length and overlap. The segmented audio files will be saved in the output directory, preserving the relative directory structure from the input directory.
11
+ """
12
+
13
+ import argparse
14
+ import glob
15
+ from pathlib import Path
16
+
17
+ from pydub import AudioSegment
18
+
19
+
20
+ # list audio files given a directory
21
+ def segment_audio(input_dir, output_dir, segment_length, overlap):
22
+ # check if input dir exist
23
+ if not Path(input_dir).exists():
24
+ print(f"Directory {input_dir} does not exist.")
25
+ return
26
+
27
+ # check if output dir exist, create if not
28
+ if not Path(output_dir).exists():
29
+ Path(output_dir).mkdir(parents=True)
30
+
31
+ audio_files = glob.glob(str(Path(input_dir) / "**" / "*.wav"), recursive=True)
32
+
33
+ for audio_file in audio_files:
34
+ # segment into 2 seconds with 1 second overlap (default values)
35
+ audio = AudioSegment.from_file(audio_file)
36
+
37
+ segments = []
38
+
39
+ for i in range(0, len(audio), segment_length - overlap):
40
+ segment = audio[i : i + segment_length]
41
+ segments.append(segment)
42
+
43
+ # Path(output_dir).mkdir(exist_ok=True)
44
+ for i, segment in enumerate(segments):
45
+ # get relative path from input_dir
46
+ relative_path = Path(audio_file).relative_to(input_dir)
47
+ # make output directory if not exist
48
+ output_subdir = Path(output_dir) / relative_path.parent
49
+ output_subdir.mkdir(parents=True, exist_ok=True)
50
+ segment.export(
51
+ str(output_subdir / f"{Path(audio_file).stem}_{i}.wav"),
52
+ format="wav",
53
+ )
54
+
55
+ print("DONE.")
56
+
57
+
58
+ if __name__ == "__main__":
59
+ parser = argparse.ArgumentParser()
60
+ parser.add_argument("--input_dir", type=str, default="./")
61
+ # add argument for output_dir
62
+ parser.add_argument(
63
+ "--output_dir",
64
+ type=str,
65
+ default="./segmented_data/",
66
+ )
67
+ parser.add_argument(
68
+ "--segment_length",
69
+ type=int,
70
+ default=2000,
71
+ )
72
+ parser.add_argument(
73
+ "--overlap",
74
+ type=int,
75
+ default=1000,
76
+ )
77
+ args = parser.parse_args()
78
+ segment_audio(
79
+ args.input_dir,
80
+ args.output_dir,
81
+ segment_length=args.segment_length,
82
+ overlap=args.overlap,
83
+ )
nkululeko/models/model.py CHANGED
@@ -3,13 +3,11 @@ import ast
3
3
  import pickle
4
4
  import random
5
5
 
6
- from joblib import parallel_backend
7
6
  import numpy as np
8
7
  import pandas as pd
9
- from sklearn.model_selection import GridSearchCV
10
- from sklearn.model_selection import LeaveOneGroupOut
11
- from sklearn.model_selection import StratifiedKFold
12
8
  import sklearn.utils
9
+ from joblib import parallel_backend
10
+ from sklearn.model_selection import GridSearchCV, LeaveOneGroupOut, StratifiedKFold
13
11
 
14
12
  import nkululeko.glob_conf as glob_conf
15
13
  from nkululeko.reporting.reporter import Reporter
@@ -119,7 +117,7 @@ class Model:
119
117
  # get unique list of speakers
120
118
  speakers = annos["speaker"].unique()
121
119
  # check for folds columns
122
- if not "fold" in annos.columns:
120
+ if "fold" not in annos.columns:
123
121
  self.util.debug(f"creating random folds for {logo} groups")
124
122
  # create a random dictionary of groups
125
123
  sdict = {}
@@ -1,6 +1,7 @@
1
1
  # model_bayes.py
2
2
 
3
3
  from sklearn.naive_bayes import GaussianNB
4
+
4
5
  from nkululeko.models.model import Model
5
6
 
6
7
 
@@ -6,23 +6,21 @@ Inspired by code from Su Lei
6
6
  """
7
7
 
8
8
  import ast
9
- from collections import OrderedDict
10
9
 
11
10
  import numpy as np
12
11
  import pandas as pd
13
- from PIL import Image
14
- from sklearn.metrics import recall_score
15
12
  import torch
16
13
  import torch.nn as nn
17
14
  import torch.nn.functional as F
18
- from torch.utils.data import Dataset
19
15
  import torchvision.transforms as transforms
16
+ from PIL import Image
17
+ from sklearn.metrics import recall_score
18
+ from torch.utils.data import Dataset
20
19
 
21
20
  import nkululeko.glob_conf as glob_conf
22
21
  from nkululeko.losses.loss_softf1loss import SoftF1Loss
23
22
  from nkululeko.models.model import Model
24
23
  from nkululeko.reporting.reporter import Reporter
25
- from nkululeko.utils.util import Util
26
24
 
27
25
 
28
26
  class CNNModel(Model):
@@ -55,7 +53,7 @@ class CNNModel(Model):
55
53
  )
56
54
  else:
57
55
  self.util.error(f"unknown loss function: {criterion}")
58
- self.util.debug(f"using model with cross entropy loss function")
56
+ self.util.debug("using model with cross entropy loss function")
59
57
  # set up the model
60
58
  # cuda = "cuda" if torch.cuda.is_available() else "cpu"
61
59
  self.device = self.util.config_val("MODEL", "device", "cpu")
@@ -1,8 +1,10 @@
1
1
  # model_gmm.py
2
2
 
3
+ import pandas as pd
3
4
  from sklearn import mixture
5
+
4
6
  from nkululeko.models.model import Model
5
- import pandas as pd
7
+
6
8
 
7
9
  class GMM_model(Model):
8
10
  """An GMM model"""
@@ -15,23 +17,25 @@ class GMM_model(Model):
15
17
  self.n_components = int(self.util.config_val("MODEL", "GMM_components", "4"))
16
18
  covariance_type = self.util.config_val("MODEL", "GMM_covariance_type", "full")
17
19
  self.clf = mixture.GaussianMixture(
18
- n_components=self.n_components,
20
+ n_components=self.n_components,
19
21
  covariance_type=covariance_type,
20
- random_state = 42,
22
+ random_state=42,
21
23
  )
22
24
  # set up the classifier
23
25
 
24
26
  def get_predictions(self):
25
- """Use the predict_proba method of the GaussianMixture model to get
26
- probabilities. Create a DataFrame with these probabilities and return
27
+ """Use the predict_proba method of the GaussianMixture model to get
28
+ probabilities. Create a DataFrame with these probabilities and return
27
29
  it along with the predictions."""
28
30
  probs = self.clf.predict_proba(self.feats_test)
29
31
  preds = self.clf.predict(self.feats_test)
30
-
32
+
31
33
  # Convert predictions to a list
32
34
  preds = preds.tolist()
33
-
35
+
34
36
  # Create a DataFrame for probabilities
35
- proba_df = pd.DataFrame(probs, index=self.feats_test.index, columns=range(self.n_components))
36
-
37
+ proba_df = pd.DataFrame(
38
+ probs, index=self.feats_test.index, columns=range(self.n_components)
39
+ )
40
+
37
41
  return preds, proba_df
@@ -1,6 +1,7 @@
1
1
  # model_knn.py
2
2
 
3
3
  from sklearn.neighbors import KNeighborsClassifier
4
+
4
5
  from nkululeko.models.model import Model
5
6
 
6
7
 
@@ -1,6 +1,7 @@
1
1
  # model_knn_reg.py
2
2
 
3
3
  from sklearn.neighbors import KNeighborsRegressor
4
+
4
5
  from nkululeko.models.model import Model
5
6
 
6
7
 
@@ -1,6 +1,7 @@
1
1
  # model_knn_reg.py
2
2
 
3
3
  from sklearn.linear_model import LinearRegression
4
+
4
5
  from nkululeko.models.model import Model
5
6
 
6
7
 
@@ -4,14 +4,13 @@ from collections import OrderedDict
4
4
 
5
5
  import numpy as np
6
6
  import pandas as pd
7
- from sklearn.metrics import recall_score
8
7
  import torch
8
+ from sklearn.metrics import recall_score
9
9
 
10
10
  import nkululeko.glob_conf as glob_conf
11
11
  from nkululeko.losses.loss_softf1loss import SoftF1Loss
12
12
  from nkululeko.models.model import Model
13
13
  from nkululeko.reporting.reporter import Reporter
14
- from nkululeko.utils.util import Util
15
14
 
16
15
 
17
16
  class MLPModel(Model):
@@ -44,7 +43,7 @@ class MLPModel(Model):
44
43
  )
45
44
  else:
46
45
  self.util.error(f"unknown loss function: {criterion}")
47
- self.util.debug(f"using model with cross entropy loss function")
46
+ self.util.debug("using model with cross entropy loss function")
48
47
  # set up the model, use GPU if availabe
49
48
  cuda = "cuda" if torch.cuda.is_available() else "cpu"
50
49
  self.device = self.util.config_val("MODEL", "device", cuda)
@@ -1,15 +1,10 @@
1
1
  # model_mlp.py
2
2
  import ast
3
3
  from collections import OrderedDict
4
- import os
5
4
 
6
5
  import numpy as np
7
6
  import torch
8
-
9
- from audmetric import concordance_cc
10
- from audmetric import mean_absolute_error
11
- from audmetric import mean_squared_error
12
- from traitlets import default
7
+ from audmetric import concordance_cc, mean_absolute_error, mean_squared_error
13
8
 
14
9
  import nkululeko.glob_conf as glob_conf
15
10
  from nkululeko.losses.loss_ccc import ConcordanceCorCoeff
@@ -1,7 +1,7 @@
1
1
  # model_svm.py
2
2
 
3
- import random
4
3
  from sklearn import svm
4
+
5
5
  from nkululeko.models.model import Model
6
6
 
7
7
 
@@ -25,7 +25,7 @@ class SVM_model(Model):
25
25
  gamma="scale",
26
26
  probability=True,
27
27
  class_weight=class_weight,
28
- random_state=42, # for consistent result
28
+ random_state=42, # for consistent result
29
29
  ) # set up the classifier
30
30
 
31
31
  def set_c(self, c):
@@ -1,6 +1,7 @@
1
1
  # svrmodel.py
2
2
 
3
3
  from sklearn import svm
4
+
4
5
  from nkululeko.models.model import Model
5
6
 
6
7
 
@@ -1,6 +1,7 @@
1
1
  # model_tree.py
2
2
 
3
3
  from sklearn.tree import DecisionTreeClassifier
4
+
4
5
  from nkululeko.models.model import Model
5
6
 
6
7
 
@@ -12,6 +13,4 @@ class Tree_model(Model):
12
13
  def __init__(self, df_train, df_test, feats_train, feats_test):
13
14
  super().__init__(df_train, df_test, feats_train, feats_test)
14
15
  self.name = "tree"
15
- self.clf = DecisionTreeClassifier(
16
- random_state=42
17
- ) # set up the classifier
16
+ self.clf = DecisionTreeClassifier(random_state=42) # set up the classifier
@@ -1,6 +1,7 @@
1
1
  # model_tree_reg.py
2
2
 
3
3
  from sklearn.tree import DecisionTreeRegressor
4
+
4
5
  from nkululeko.models.model import Model
5
6
 
6
7