likelihood 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  import os
3
3
  import random
4
+ import warnings
4
5
  from functools import partial
5
6
  from shutil import rmtree
6
7
 
@@ -14,8 +15,8 @@ from pandas.plotting import radviz
14
15
  os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
15
16
  logging.getLogger("tensorflow").setLevel(logging.ERROR)
16
17
 
17
- import warnings
18
- from functools import wraps
18
+
19
+ from typing import List
19
20
 
20
21
  import keras_tuner
21
22
  import tensorflow as tf
@@ -24,21 +25,11 @@ from sklearn.manifold import TSNE
24
25
  from tensorflow.keras.layers import InputLayer
25
26
  from tensorflow.keras.regularizers import l2
26
27
 
27
- from likelihood.tools import OneHotEncoder
28
+ from likelihood.tools import LoRALayer, OneHotEncoder, suppress_warnings
28
29
 
29
30
  tf.get_logger().setLevel("ERROR")
30
31
 
31
32
 
32
- def suppress_warnings(func):
33
- @wraps(func)
34
- def wrapper(*args, **kwargs):
35
- with warnings.catch_warnings():
36
- warnings.simplefilter("ignore")
37
- return func(*args, **kwargs)
38
-
39
- return wrapper
40
-
41
-
42
33
  class EarlyStopping:
43
34
  def __init__(self, patience=10, min_delta=0.001):
44
35
  self.patience = patience
@@ -246,7 +237,7 @@ class AutoClassifier(tf.keras.Model):
246
237
  Additional keyword arguments to pass to the model.
247
238
 
248
239
  classifier_activation : `str`
249
- The activation function to use for the classifier layer. Default is "softmax". If the activation function is not a classification function, the model can be used in regression problems.
240
+ The activation function to use for the classifier layer. Default is `softmax`. If the activation function is not a classification function, the model can be used in regression problems.
250
241
  num_layers : `int`
251
242
  The number of hidden layers in the classifier. Default is 1.
252
243
  dropout : `float`
@@ -257,6 +248,10 @@ class AutoClassifier(tf.keras.Model):
257
248
  Whether to use variational autoencoder mode. Default is False.
258
249
  vae_units : `int`
259
250
  The number of units in the variational autoencoder. Default is 2.
251
+ lora_mode : `bool`
252
+ Whether to use LoRA layers. Default is False.
253
+ lora_rank : `int`
254
+ The rank of the LoRA layer. Default is 4.
260
255
  """
261
256
 
262
257
  def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
@@ -275,6 +270,8 @@ class AutoClassifier(tf.keras.Model):
275
270
  self.l2_reg = kwargs.get("l2_reg", 0.0)
276
271
  self.vae_mode = kwargs.get("vae_mode", False)
277
272
  self.vae_units = kwargs.get("vae_units", 2)
273
+ self.lora_mode = kwargs.get("lora_mode", False)
274
+ self.lora_rank = kwargs.get("lora_rank", 4)
278
275
 
279
276
  def build_encoder_decoder(self, input_shape):
280
277
  self.encoder = (
@@ -367,9 +364,8 @@ class AutoClassifier(tf.keras.Model):
367
364
  else:
368
365
  self.build_encoder_decoder(input_shape)
369
366
 
370
- # Classifier with L2 regularization
371
367
  self.classifier = tf.keras.Sequential()
372
- if self.num_layers > 1:
368
+ if self.num_layers > 1 and not self.lora_mode:
373
369
  for _ in range(self.num_layers - 1):
374
370
  self.classifier.add(
375
371
  tf.keras.layers.Dense(
@@ -380,13 +376,36 @@ class AutoClassifier(tf.keras.Model):
380
376
  )
381
377
  if self.dropout:
382
378
  self.classifier.add(tf.keras.layers.Dropout(self.dropout))
383
- self.classifier.add(
384
- tf.keras.layers.Dense(
385
- units=self.num_classes,
386
- activation=self.classifier_activation,
387
- kernel_regularizer=l2(self.l2_reg),
379
+ self.classifier.add(
380
+ tf.keras.layers.Dense(
381
+ units=self.num_classes,
382
+ activation=self.classifier_activation,
383
+ kernel_regularizer=l2(self.l2_reg),
384
+ )
385
+ )
386
+ elif self.lora_mode:
387
+ for _ in range(self.num_layers - 1):
388
+ self.classifier.add(
389
+ LoRALayer(units=self.units, rank=self.lora_rank, name=f"LoRA_{_}")
390
+ )
391
+ self.classifier.add(tf.keras.layers.Activation(self.activation))
392
+ if self.dropout:
393
+ self.classifier.add(tf.keras.layers.Dropout(self.dropout))
394
+ self.classifier.add(
395
+ tf.keras.layers.Dense(
396
+ units=self.num_classes,
397
+ activation=self.classifier_activation,
398
+ kernel_regularizer=l2(self.l2_reg),
399
+ )
400
+ )
401
+ else:
402
+ self.classifier.add(
403
+ tf.keras.layers.Dense(
404
+ units=self.num_classes,
405
+ activation=self.classifier_activation,
406
+ kernel_regularizer=l2(self.l2_reg),
407
+ )
388
408
  )
389
- )
390
409
 
391
410
  def train_encoder_decoder(
392
411
  self, data, epochs, batch_size, validation_split=0.2, patience=10, **kwargs
@@ -498,7 +517,6 @@ class AutoClassifier(tf.keras.Model):
498
517
  if not isinstance(source_model, AutoClassifier):
499
518
  raise ValueError("Source model must be an instance of AutoClassifier.")
500
519
 
501
- # Check compatibility in input shape and units
502
520
  if self.input_shape_parm != source_model.input_shape_parm:
503
521
  raise ValueError(
504
522
  f"Incompatible input shape. Expected {self.input_shape_parm}, got {source_model.input_shape_parm}."
@@ -508,9 +526,8 @@ class AutoClassifier(tf.keras.Model):
508
526
  f"Incompatible number of units. Expected {self.units}, got {source_model.units}."
509
527
  )
510
528
  self.encoder, self.decoder = tf.keras.Sequential(), tf.keras.Sequential()
511
- # Copy the encoder layers
512
529
  for i, layer in enumerate(source_model.encoder.layers):
513
- if isinstance(layer, tf.keras.layers.Dense): # Make sure it's a Dense layer
530
+ if isinstance(layer, tf.keras.layers.Dense):
514
531
  dummy_input = tf.convert_to_tensor(tf.random.normal([1, layer.input_shape[1]]))
515
532
  dense_layer = tf.keras.layers.Dense(
516
533
  units=layer.units,
@@ -519,14 +536,12 @@ class AutoClassifier(tf.keras.Model):
519
536
  )
520
537
  dense_layer.build(dummy_input.shape)
521
538
  self.encoder.add(dense_layer)
522
- # Set the weights correctly
523
539
  self.encoder.layers[i].set_weights(layer.get_weights())
524
540
  elif not isinstance(layer, InputLayer):
525
541
  raise ValueError(f"Layer type {type(layer)} not supported for copying.")
526
542
 
527
- # Copy the decoder layers
528
543
  for i, layer in enumerate(source_model.decoder.layers):
529
- if isinstance(layer, tf.keras.layers.Dense): # Ensure it's a Dense layer
544
+ if isinstance(layer, tf.keras.layers.Dense):
530
545
  dummy_input = tf.convert_to_tensor(tf.random.normal([1, layer.input_shape[1]]))
531
546
  dense_layer = tf.keras.layers.Dense(
532
547
  units=layer.units,
@@ -535,7 +550,6 @@ class AutoClassifier(tf.keras.Model):
535
550
  )
536
551
  dense_layer.build(dummy_input.shape)
537
552
  self.decoder.add(dense_layer)
538
- # Set the weights correctly
539
553
  self.decoder.layers[i].set_weights(layer.get_weights())
540
554
  elif not isinstance(layer, InputLayer):
541
555
  raise ValueError(f"Layer type {type(layer)} not supported for copying.")
@@ -552,6 +566,8 @@ class AutoClassifier(tf.keras.Model):
552
566
  "l2_reg": self.l2_reg,
553
567
  "vae_mode": self.vae_mode,
554
568
  "vae_units": self.vae_units,
569
+ "lora_mode": self.lora_mode,
570
+ "lora_rank": self.lora_rank,
555
571
  }
556
572
  base_config = super(AutoClassifier, self).get_config()
557
573
  return dict(list(base_config.items()) + list(config.items()))
@@ -569,6 +585,8 @@ class AutoClassifier(tf.keras.Model):
569
585
  l2_reg=config["l2_reg"],
570
586
  vae_mode=config["vae_mode"],
571
587
  vae_units=config["vae_units"],
588
+ lora_mode=config["lora_mode"],
589
+ lora_rank=config["lora_rank"],
572
590
  )
573
591
 
574
592
 
@@ -874,62 +892,220 @@ def setup_model(
874
892
 
875
893
 
876
894
  class GetInsights:
895
+ """
896
+ A class to analyze the output of a neural network model, including visualizations
897
+ of the weights, t-SNE representation, and feature statistics.
898
+
899
+ Parameters
900
+ ----------
901
+ model : `AutoClassifier`
902
+ The trained model to analyze.
903
+ inputs : `np.ndarray`
904
+ The input data for analysis.
905
+ """
906
+
877
907
  def __init__(self, model: AutoClassifier, inputs: np.ndarray) -> None:
908
+ """
909
+ Initializes the GetInsights class.
910
+
911
+ Parameters
912
+ ----------
913
+ model : `AutoClassifier`
914
+ The trained model to analyze.
915
+ inputs : `np.ndarray`
916
+ The input data for analysis.
917
+ """
878
918
  self.inputs = inputs
879
919
  self.model = model
880
- if isinstance(self.model.encoder.layers[0], InputLayer):
881
- self.encoder_layer = self.model.encoder.layers[1]
882
- else:
883
- self.encoder_layer = self.model.encoder.layers[0]
920
+
921
+ self.encoder_layer = (
922
+ self.model.encoder.layers[1]
923
+ if isinstance(self.model.encoder.layers[0], InputLayer)
924
+ else self.model.encoder.layers[0]
925
+ )
884
926
  self.decoder_layer = self.model.decoder.layers[0]
927
+
885
928
  self.encoder_weights = self.encoder_layer.get_weights()[0]
886
929
  self.decoder_weights = self.decoder_layer.get_weights()[0]
887
- colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)
888
930
 
931
+ self.sorted_names = self._generate_sorted_color_names()
932
+
933
+ def _generate_sorted_color_names(self) -> list:
934
+ """
935
+ Generate sorted color names based on their HSV values.
936
+
937
+ Parameters
938
+ ----------
939
+ `None`
940
+
941
+ Returns
942
+ -------
943
+ `list` : Sorted color names.
944
+ """
945
+ colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)
889
946
  by_hsv = sorted(
890
947
  (tuple(mcolors.rgb_to_hsv(mcolors.to_rgba(color)[:3])), name)
891
948
  for name, color in colors.items()
892
949
  )
893
- self.sorted_names = [name for hsv, name in by_hsv if hsv[1] > 0.4 and hsv[2] >= 0.4]
894
- random.shuffle(self.sorted_names)
950
+ sorted_names = [name for hsv, name in by_hsv if hsv[1] > 0.4 and hsv[2] >= 0.4]
951
+ random.shuffle(sorted_names)
952
+ return sorted_names
895
953
 
896
954
  def predictor_analyzer(
897
955
  self,
898
- frac=None,
956
+ frac: float = None,
899
957
  cmap: str = "viridis",
900
958
  aspect: str = "auto",
901
959
  highlight: bool = True,
902
960
  **kwargs,
903
961
  ) -> None:
962
+ """
963
+ Analyze the model's predictions and visualize data.
964
+
965
+ Parameters
966
+ ----------
967
+ frac : `float`, optional
968
+ Fraction of data to use for analysis (default is `None`).
969
+ cmap : `str`, optional
970
+ The colormap for visualization (default is `"viridis"`).
971
+ aspect : `str`, optional
972
+ Aspect ratio for the visualization (default is `"auto"`).
973
+ highlight : `bool`, optional
974
+ Whether to highlight the maximum weights (default is `True`).
975
+ **kwargs : `dict`, optional
976
+ Additional keyword arguments for customization.
977
+
978
+ Returns
979
+ -------
980
+ `DataFrame` : The statistical summary of the input data.
981
+ """
904
982
  self._viz_weights(cmap=cmap, aspect=aspect, highlight=highlight, **kwargs)
905
983
  inputs = self.inputs.copy()
984
+ inputs = self._prepare_inputs(inputs, frac)
906
985
  y_labels = kwargs.get("y_labels", None)
986
+ encoded, reconstructed = self._encode_decode(inputs)
987
+ self._visualize_data(inputs, reconstructed, cmap, aspect)
988
+ self._prepare_data_for_analysis(inputs, reconstructed, encoded, y_labels)
989
+
990
+ try:
991
+ self._get_tsne_repr(inputs, frac)
992
+ self._viz_tsne_repr(c=self.classification)
993
+
994
+ self._viz_radviz(self.data, "class", "Radviz Visualization of Latent Space")
995
+ self._viz_radviz(self.data_input, "class", "Radviz Visualization of Input Data")
996
+ except ValueError:
997
+ warnings.warn(
998
+ "Some functions or processes will not be executed for regression problems.",
999
+ UserWarning,
1000
+ )
1001
+
1002
+ return self._statistics(self.data_input)
1003
+
1004
+ def _prepare_inputs(self, inputs: np.ndarray, frac: float) -> np.ndarray:
1005
+ """
1006
+ Prepare the input data, possibly selecting a fraction of it.
1007
+
1008
+ Parameters
1009
+ ----------
1010
+ inputs : `np.ndarray`
1011
+ The input data.
1012
+ frac : `float`
1013
+ Fraction of data to use.
1014
+
1015
+ Returns
1016
+ -------
1017
+ `np.ndarray` : The prepared input data.
1018
+ """
907
1019
  if frac:
908
1020
  n = int(frac * self.inputs.shape[0])
909
1021
  indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
910
1022
  inputs = inputs[indexes]
911
1023
  inputs[np.isnan(inputs)] = 0.0
912
- # check if self.model.encoder(inputs) has two outputs
1024
+ return inputs
1025
+
1026
+ def _encode_decode(self, inputs: np.ndarray) -> tuple:
1027
+ """
1028
+ Perform encoding and decoding on the input data.
1029
+
1030
+ Parameters
1031
+ ----------
1032
+ inputs : `np.ndarray`
1033
+ The input data.
1034
+
1035
+ Returns
1036
+ -------
1037
+ `tuple` : The encoded and reconstructed data.
1038
+ """
913
1039
  try:
914
1040
  mean, log_var = self.model.encoder(inputs)
915
1041
  encoded = sampling(mean, log_var)
916
1042
  except:
917
1043
  encoded = self.model.encoder(inputs)
918
1044
  reconstructed = self.model.decoder(encoded)
919
- combined = tf.concat([reconstructed, encoded], axis=1)
920
- self.classification = self.model.classifier(combined).numpy().argmax(axis=1)
1045
+ return encoded, reconstructed
1046
+
1047
+ def _visualize_data(
1048
+ self, inputs: np.ndarray, reconstructed: np.ndarray, cmap: str, aspect: str
1049
+ ) -> None:
1050
+ """
1051
+ Visualize the original data and the reconstructed data.
1052
+
1053
+ Parameters
1054
+ ----------
1055
+ inputs : `np.ndarray`
1056
+ The input data.
1057
+ reconstructed : `np.ndarray`
1058
+ The reconstructed data.
1059
+ cmap : `str`
1060
+ The colormap for visualization.
1061
+ aspect : `str`
1062
+ Aspect ratio for the visualization.
1063
+
1064
+ Returns
1065
+ -------
1066
+ `None`
1067
+ """
921
1068
  ax = plt.subplot(1, 2, 1)
922
- plt.imshow(self.inputs, cmap=cmap, aspect=aspect)
1069
+ plt.imshow(inputs, cmap=cmap, aspect=aspect)
923
1070
  plt.colorbar()
924
1071
  plt.title("Original Data")
1072
+
925
1073
  plt.subplot(1, 2, 2, sharex=ax, sharey=ax)
926
1074
  plt.imshow(reconstructed, cmap=cmap, aspect=aspect)
927
1075
  plt.colorbar()
928
1076
  plt.title("Decoder Layer Reconstruction")
929
1077
  plt.show()
930
1078
 
931
- self._get_tsne_repr(inputs=inputs, frac=frac)
932
- self._viz_tsne_repr(c=self.classification)
1079
+ def _prepare_data_for_analysis(
1080
+ self,
1081
+ inputs: np.ndarray,
1082
+ reconstructed: np.ndarray,
1083
+ encoded: np.ndarray,
1084
+ y_labels: List[str],
1085
+ ) -> None:
1086
+ """
1087
+ Prepare data for statistical analysis.
1088
+
1089
+ Parameters
1090
+ ----------
1091
+ inputs : `np.ndarray`
1092
+ The input data.
1093
+ reconstructed : `np.ndarray`
1094
+ The reconstructed data.
1095
+ encoded : `np.ndarray`
1096
+ The encoded data.
1097
+ y_labels : `List[str]`
1098
+ The labels of features.
1099
+
1100
+ Returns
1101
+ -------
1102
+ `None`
1103
+ """
1104
+ self.classification = (
1105
+ self.model.classifier(tf.concat([reconstructed, encoded], axis=1))
1106
+ .numpy()
1107
+ .argmax(axis=1)
1108
+ )
933
1109
 
934
1110
  self.data = pd.DataFrame(encoded, columns=[f"Feature {i}" for i in range(encoded.shape[1])])
935
1111
  self.data_input = pd.DataFrame(
@@ -938,84 +1114,25 @@ class GetInsights:
938
1114
  [f"Feature {i}" for i in range(inputs.shape[1])] if y_labels is None else y_labels
939
1115
  ),
940
1116
  )
1117
+
941
1118
  self.data["class"] = self.classification
942
1119
  self.data_input["class"] = self.classification
943
1120
 
944
- self.data_normalized = self.data.copy(deep=True)
945
- self.data_normalized.iloc[:, :-1] = (
946
- 2.0
947
- * (self.data_normalized.iloc[:, :-1] - self.data_normalized.iloc[:, :-1].min())
948
- / (self.data_normalized.iloc[:, :-1].max() - self.data_normalized.iloc[:, :-1].min())
949
- - 1
950
- )
951
- radviz(self.data_normalized, "class", color=self.colors)
952
- plt.title("Radviz Visualization of Latent Space")
953
- plt.show()
954
- self.data_input_normalized = self.data_input.copy(deep=True)
955
- self.data_input_normalized.iloc[:, :-1] = (
956
- 2.0
957
- * (
958
- self.data_input_normalized.iloc[:, :-1]
959
- - self.data_input_normalized.iloc[:, :-1].min()
960
- )
961
- / (
962
- self.data_input_normalized.iloc[:, :-1].max()
963
- - self.data_input_normalized.iloc[:, :-1].min()
964
- )
965
- - 1
966
- )
967
- radviz(self.data_input_normalized, "class", color=self.colors)
968
- plt.title("Radviz Visualization of Input Data")
969
- plt.show()
970
- return self._statistics(self.data_input)
971
-
972
- def _statistics(self, data_input: DataFrame, **kwargs) -> DataFrame:
973
- data = data_input.copy(deep=True)
974
-
975
- if not pd.api.types.is_string_dtype(data["class"]):
976
- data["class"] = data["class"].astype(str)
977
-
978
- data.ffill(inplace=True)
979
- grouped_data = data.groupby("class")
980
-
981
- numerical_stats = grouped_data.agg(["mean", "min", "max", "std", "median"])
982
- numerical_stats.columns = ["_".join(col).strip() for col in numerical_stats.columns.values]
983
-
984
- def get_mode(x):
985
- mode_series = x.mode()
986
- return mode_series.iloc[0] if not mode_series.empty else None
987
-
988
- mode_stats = grouped_data.apply(get_mode, include_groups=False)
989
- mode_stats.columns = [f"{col}_mode" for col in mode_stats.columns]
990
- combined_stats = pd.concat([numerical_stats, mode_stats], axis=1)
991
-
992
- return combined_stats.T
993
-
994
- def _viz_weights(
995
- self, cmap: str = "viridis", aspect: str = "auto", highlight: bool = True, **kwargs
996
- ) -> None:
997
- title = kwargs.get("title", "Encoder Layer Weights (Dense Layer)")
998
- y_labels = kwargs.get("y_labels", None)
999
- cmap_highlight = kwargs.get("cmap_highlight", "Pastel1")
1000
- highlight_mask = np.zeros_like(self.encoder_weights, dtype=bool)
1121
+ def _get_tsne_repr(self, inputs: np.ndarray = None, frac: float = None) -> None:
1122
+ """
1123
+ Perform t-SNE dimensionality reduction on the input data.
1001
1124
 
1002
- plt.imshow(self.encoder_weights, cmap=cmap, aspect=aspect)
1003
- plt.colorbar()
1004
- plt.title(title)
1005
- if y_labels is not None:
1006
- plt.yticks(ticks=np.arange(self.encoder_weights.shape[0]), labels=y_labels)
1007
- if highlight:
1008
- for i, j in enumerate(self.encoder_weights.argmax(axis=1)):
1009
- highlight_mask[i, j] = True
1010
- plt.imshow(
1011
- np.ma.masked_where(~highlight_mask, self.encoder_weights),
1012
- cmap=cmap_highlight,
1013
- alpha=0.5,
1014
- aspect=aspect,
1015
- )
1016
- plt.show()
1125
+ Parameters
1126
+ ----------
1127
+ inputs : `np.ndarray`
1128
+ The input data.
1129
+ frac : `float`
1130
+ Fraction of data to use.
1017
1131
 
1018
- def _get_tsne_repr(self, inputs=None, frac=None) -> None:
1132
+ Returns
1133
+ -------
1134
+ `None`
1135
+ """
1019
1136
  if inputs is None:
1020
1137
  inputs = self.inputs.copy()
1021
1138
  if frac:
@@ -1029,26 +1146,145 @@ class GetInsights:
1029
1146
  self.reduced_data_tsne = tsne.fit_transform(self.latent_representations)
1030
1147
 
1031
1148
  def _viz_tsne_repr(self, **kwargs) -> None:
1149
+ """
1150
+ Visualize the t-SNE representation of the latent space.
1151
+
1152
+ Parameters
1153
+ ----------
1154
+ **kwargs : `dict`
1155
+ Additional keyword arguments for customization.
1156
+
1157
+ Returns
1158
+ -------
1159
+ `None`
1160
+ """
1032
1161
  c = kwargs.get("c", None)
1033
1162
  self.colors = (
1034
1163
  kwargs.get("colors", self.sorted_names[: len(np.unique(c))]) if c is not None else None
1035
1164
  )
1165
+
1036
1166
  plt.scatter(
1037
1167
  self.reduced_data_tsne[:, 0],
1038
1168
  self.reduced_data_tsne[:, 1],
1039
1169
  cmap=matplotlib.colors.ListedColormap(self.colors) if c is not None else None,
1040
1170
  c=c,
1041
1171
  )
1172
+
1042
1173
  if c is not None:
1043
1174
  cb = plt.colorbar()
1044
1175
  loc = np.arange(0, max(c), max(c) / float(len(self.colors)))
1045
1176
  cb.set_ticks(loc)
1046
1177
  cb.set_ticklabels(np.unique(c))
1178
+
1047
1179
  plt.title("t-SNE Visualization of Latent Space")
1048
1180
  plt.xlabel("t-SNE 1")
1049
1181
  plt.ylabel("t-SNE 2")
1050
1182
  plt.show()
1051
1183
 
1184
+ def _viz_radviz(self, data: pd.DataFrame, color_column: str, title: str) -> None:
1185
+ """
1186
+ Visualize the data using RadViz.
1187
+
1188
+ Parameters
1189
+ ----------
1190
+ data : `pd.DataFrame`
1191
+ The data to visualize.
1192
+ color_column : `str`
1193
+ The column to use for coloring.
1194
+ title : `str`
1195
+ The title of the plot.
1196
+
1197
+ Returns
1198
+ -------
1199
+ `None`
1200
+ """
1201
+ data_normalized = data.copy(deep=True)
1202
+ data_normalized.iloc[:, :-1] = (
1203
+ 2.0
1204
+ * (data_normalized.iloc[:, :-1] - data_normalized.iloc[:, :-1].min())
1205
+ / (data_normalized.iloc[:, :-1].max() - data_normalized.iloc[:, :-1].min())
1206
+ - 1
1207
+ )
1208
+ radviz(data_normalized, color_column, color=self.colors)
1209
+ plt.title(title)
1210
+ plt.show()
1211
+
1212
+ def _viz_weights(
1213
+ self, cmap: str = "viridis", aspect: str = "auto", highlight: bool = True, **kwargs
1214
+ ) -> None:
1215
+ """
1216
+ Visualize the encoder layer weights of the model.
1217
+
1218
+ Parameters
1219
+ ----------
1220
+ cmap : `str`, optional
1221
+ The colormap for visualization (default is `"viridis"`).
1222
+ aspect : `str`, optional
1223
+ Aspect ratio for the visualization (default is `"auto"`).
1224
+ highlight : `bool`, optional
1225
+ Whether to highlight the maximum weights (default is `True`).
1226
+ **kwargs : `dict`, optional
1227
+ Additional keyword arguments for customization.
1228
+
1229
+ Returns
1230
+ -------
1231
+ `None`
1232
+ """
1233
+ title = kwargs.get("title", "Encoder Layer Weights (Dense Layer)")
1234
+ y_labels = kwargs.get("y_labels", None)
1235
+ cmap_highlight = kwargs.get("cmap_highlight", "Pastel1")
1236
+ highlight_mask = np.zeros_like(self.encoder_weights, dtype=bool)
1237
+
1238
+ plt.imshow(self.encoder_weights, cmap=cmap, aspect=aspect)
1239
+ plt.colorbar()
1240
+ plt.title(title)
1241
+ if y_labels is not None:
1242
+ plt.yticks(ticks=np.arange(self.encoder_weights.shape[0]), labels=y_labels)
1243
+ if highlight:
1244
+ for i, j in enumerate(self.encoder_weights.argmax(axis=1)):
1245
+ highlight_mask[i, j] = True
1246
+ plt.imshow(
1247
+ np.ma.masked_where(~highlight_mask, self.encoder_weights),
1248
+ cmap=cmap_highlight,
1249
+ alpha=0.5,
1250
+ aspect=aspect,
1251
+ )
1252
+ plt.show()
1253
+
1254
+ def _statistics(self, data_input: DataFrame) -> DataFrame:
1255
+ """
1256
+ Compute statistical summaries of the input data.
1257
+
1258
+ Parameters
1259
+ ----------
1260
+ data_input : `DataFrame`
1261
+ The data to compute statistics for.
1262
+
1263
+ Returns
1264
+ -------
1265
+ `DataFrame` : The statistical summary of the input data.
1266
+ """
1267
+ data = data_input.copy(deep=True)
1268
+
1269
+ if not pd.api.types.is_string_dtype(data["class"]):
1270
+ data["class"] = data["class"].astype(str)
1271
+
1272
+ data.ffill(inplace=True)
1273
+ grouped_data = data.groupby("class")
1274
+
1275
+ numerical_stats = grouped_data.agg(["mean", "min", "max", "std", "median"])
1276
+ numerical_stats.columns = ["_".join(col).strip() for col in numerical_stats.columns.values]
1277
+
1278
+ def get_mode(x):
1279
+ mode_series = x.mode()
1280
+ return mode_series.iloc[0] if not mode_series.empty else None
1281
+
1282
+ mode_stats = grouped_data.apply(get_mode, include_groups=False)
1283
+ mode_stats.columns = [f"{col}_mode" for col in mode_stats.columns]
1284
+ combined_stats = pd.concat([numerical_stats, mode_stats], axis=1)
1285
+
1286
+ return combined_stats.T
1287
+
1052
1288
 
1053
1289
  ########################################################################################
1054
1290
 
@@ -1,2 +1,3 @@
1
+ from .models_tools import *
1
2
  from .numeric_tools import *
2
3
  from .tools import *