likelihood 1.5.5__py3-none-any.whl → 1.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,3 @@
1
1
  from .autoencoders import *
2
2
  from .gan import *
3
+ from .predictor import GetInsights
@@ -1,27 +1,17 @@
1
1
  import logging
2
2
  import os
3
- import random
4
- import warnings
5
3
  from functools import partial
6
4
  from shutil import rmtree
7
5
 
8
- import matplotlib
9
- import matplotlib.colors as mcolors
10
- import matplotlib.pyplot as plt
11
6
  import numpy as np
12
7
  import pandas as pd
13
- from pandas.plotting import radviz
14
8
 
15
9
  os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
16
10
  logging.getLogger("tensorflow").setLevel(logging.ERROR)
17
11
 
18
-
19
- from typing import List
20
-
21
12
  import keras_tuner
22
13
  import tensorflow as tf
23
14
  from pandas.core.frame import DataFrame
24
- from sklearn.manifold import TSNE
25
15
  from tensorflow.keras.layers import InputLayer
26
16
  from tensorflow.keras.regularizers import l2
27
17
 
@@ -889,448 +879,3 @@ def setup_model(
889
879
 
890
880
  best_hps = tuner.get_best_hyperparameters(1)[0].values
891
881
  return best_model, pd.DataFrame(best_hps, index=["Value"])
892
-
893
-
894
- class GetInsights:
895
- """
896
- A class to analyze the output of a neural network model, including visualizations
897
- of the weights, t-SNE representation, and feature statistics.
898
-
899
- Parameters
900
- ----------
901
- model : `AutoClassifier`
902
- The trained model to analyze.
903
- inputs : `np.ndarray`
904
- The input data for analysis.
905
- """
906
-
907
- def __init__(self, model: AutoClassifier, inputs: np.ndarray) -> None:
908
- """
909
- Initializes the GetInsights class.
910
-
911
- Parameters
912
- ----------
913
- model : `AutoClassifier`
914
- The trained model to analyze.
915
- inputs : `np.ndarray`
916
- The input data for analysis.
917
- """
918
- self.inputs = inputs
919
- self.model = model
920
-
921
- self.encoder_layer = (
922
- self.model.encoder.layers[1]
923
- if isinstance(self.model.encoder.layers[0], InputLayer)
924
- else self.model.encoder.layers[0]
925
- )
926
- self.decoder_layer = self.model.decoder.layers[0]
927
-
928
- self.encoder_weights = self.encoder_layer.get_weights()[0]
929
- self.decoder_weights = self.decoder_layer.get_weights()[0]
930
-
931
- self.sorted_names = self._generate_sorted_color_names()
932
-
933
- def _generate_sorted_color_names(self) -> list:
934
- """
935
- Generate sorted color names based on their HSV values.
936
-
937
- Parameters
938
- ----------
939
- `None`
940
-
941
- Returns
942
- -------
943
- `list` : Sorted color names.
944
- """
945
- colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)
946
- by_hsv = sorted(
947
- (tuple(mcolors.rgb_to_hsv(mcolors.to_rgba(color)[:3])), name)
948
- for name, color in colors.items()
949
- )
950
- sorted_names = [name for hsv, name in by_hsv if hsv[1] > 0.4 and hsv[2] >= 0.4]
951
- random.shuffle(sorted_names)
952
- return sorted_names
953
-
954
- def predictor_analyzer(
955
- self,
956
- frac: float = None,
957
- cmap: str = "viridis",
958
- aspect: str = "auto",
959
- highlight: bool = True,
960
- **kwargs,
961
- ) -> None:
962
- """
963
- Analyze the model's predictions and visualize data.
964
-
965
- Parameters
966
- ----------
967
- frac : `float`, optional
968
- Fraction of data to use for analysis (default is `None`).
969
- cmap : `str`, optional
970
- The colormap for visualization (default is `"viridis"`).
971
- aspect : `str`, optional
972
- Aspect ratio for the visualization (default is `"auto"`).
973
- highlight : `bool`, optional
974
- Whether to highlight the maximum weights (default is `True`).
975
- **kwargs : `dict`, optional
976
- Additional keyword arguments for customization.
977
-
978
- Returns
979
- -------
980
- `DataFrame` : The statistical summary of the input data.
981
- """
982
- self._viz_weights(cmap=cmap, aspect=aspect, highlight=highlight, **kwargs)
983
- inputs = self.inputs.copy()
984
- inputs = self._prepare_inputs(inputs, frac)
985
- y_labels = kwargs.get("y_labels", None)
986
- encoded, reconstructed = self._encode_decode(inputs)
987
- self._visualize_data(inputs, reconstructed, cmap, aspect)
988
- self._prepare_data_for_analysis(inputs, reconstructed, encoded, y_labels)
989
-
990
- try:
991
- self._get_tsne_repr(inputs, frac)
992
- self._viz_tsne_repr(c=self.classification)
993
-
994
- self._viz_radviz(self.data, "class", "Radviz Visualization of Latent Space")
995
- self._viz_radviz(self.data_input, "class", "Radviz Visualization of Input Data")
996
- except ValueError:
997
- warnings.warn(
998
- "Some functions or processes will not be executed for regression problems.",
999
- UserWarning,
1000
- )
1001
-
1002
- return self._statistics(self.data_input)
1003
-
1004
- def _prepare_inputs(self, inputs: np.ndarray, frac: float) -> np.ndarray:
1005
- """
1006
- Prepare the input data, possibly selecting a fraction of it.
1007
-
1008
- Parameters
1009
- ----------
1010
- inputs : `np.ndarray`
1011
- The input data.
1012
- frac : `float`
1013
- Fraction of data to use.
1014
-
1015
- Returns
1016
- -------
1017
- `np.ndarray` : The prepared input data.
1018
- """
1019
- if frac:
1020
- n = int(frac * self.inputs.shape[0])
1021
- indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
1022
- inputs = inputs[indexes]
1023
- inputs[np.isnan(inputs)] = 0.0
1024
- return inputs
1025
-
1026
- def _encode_decode(self, inputs: np.ndarray) -> tuple:
1027
- """
1028
- Perform encoding and decoding on the input data.
1029
-
1030
- Parameters
1031
- ----------
1032
- inputs : `np.ndarray`
1033
- The input data.
1034
-
1035
- Returns
1036
- -------
1037
- `tuple` : The encoded and reconstructed data.
1038
- """
1039
- try:
1040
- mean, log_var = self.model.encoder(inputs)
1041
- encoded = sampling(mean, log_var)
1042
- except:
1043
- encoded = self.model.encoder(inputs)
1044
- reconstructed = self.model.decoder(encoded)
1045
- return encoded, reconstructed
1046
-
1047
- def _visualize_data(
1048
- self, inputs: np.ndarray, reconstructed: np.ndarray, cmap: str, aspect: str
1049
- ) -> None:
1050
- """
1051
- Visualize the original data and the reconstructed data.
1052
-
1053
- Parameters
1054
- ----------
1055
- inputs : `np.ndarray`
1056
- The input data.
1057
- reconstructed : `np.ndarray`
1058
- The reconstructed data.
1059
- cmap : `str`
1060
- The colormap for visualization.
1061
- aspect : `str`
1062
- Aspect ratio for the visualization.
1063
-
1064
- Returns
1065
- -------
1066
- `None`
1067
- """
1068
- ax = plt.subplot(1, 2, 1)
1069
- plt.imshow(inputs, cmap=cmap, aspect=aspect)
1070
- plt.colorbar()
1071
- plt.title("Original Data")
1072
-
1073
- plt.subplot(1, 2, 2, sharex=ax, sharey=ax)
1074
- plt.imshow(reconstructed, cmap=cmap, aspect=aspect)
1075
- plt.colorbar()
1076
- plt.title("Decoder Layer Reconstruction")
1077
- plt.show()
1078
-
1079
- def _prepare_data_for_analysis(
1080
- self,
1081
- inputs: np.ndarray,
1082
- reconstructed: np.ndarray,
1083
- encoded: np.ndarray,
1084
- y_labels: List[str],
1085
- ) -> None:
1086
- """
1087
- Prepare data for statistical analysis.
1088
-
1089
- Parameters
1090
- ----------
1091
- inputs : `np.ndarray`
1092
- The input data.
1093
- reconstructed : `np.ndarray`
1094
- The reconstructed data.
1095
- encoded : `np.ndarray`
1096
- The encoded data.
1097
- y_labels : `List[str]`
1098
- The labels of features.
1099
-
1100
- Returns
1101
- -------
1102
- `None`
1103
- """
1104
- self.classification = (
1105
- self.model.classifier(tf.concat([reconstructed, encoded], axis=1))
1106
- .numpy()
1107
- .argmax(axis=1)
1108
- )
1109
-
1110
- self.data = pd.DataFrame(encoded, columns=[f"Feature {i}" for i in range(encoded.shape[1])])
1111
- self.data_input = pd.DataFrame(
1112
- inputs,
1113
- columns=(
1114
- [f"Feature {i}" for i in range(inputs.shape[1])] if y_labels is None else y_labels
1115
- ),
1116
- )
1117
-
1118
- self.data["class"] = self.classification
1119
- self.data_input["class"] = self.classification
1120
-
1121
- def _get_tsne_repr(self, inputs: np.ndarray = None, frac: float = None) -> None:
1122
- """
1123
- Perform t-SNE dimensionality reduction on the input data.
1124
-
1125
- Parameters
1126
- ----------
1127
- inputs : `np.ndarray`
1128
- The input data.
1129
- frac : `float`
1130
- Fraction of data to use.
1131
-
1132
- Returns
1133
- -------
1134
- `None`
1135
- """
1136
- if inputs is None:
1137
- inputs = self.inputs.copy()
1138
- if frac:
1139
- n = int(frac * self.inputs.shape[0])
1140
- indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
1141
- inputs = inputs[indexes]
1142
- inputs[np.isnan(inputs)] = 0.0
1143
- self.latent_representations = inputs @ self.encoder_weights
1144
-
1145
- tsne = TSNE(n_components=2)
1146
- self.reduced_data_tsne = tsne.fit_transform(self.latent_representations)
1147
-
1148
- def _viz_tsne_repr(self, **kwargs) -> None:
1149
- """
1150
- Visualize the t-SNE representation of the latent space.
1151
-
1152
- Parameters
1153
- ----------
1154
- **kwargs : `dict`
1155
- Additional keyword arguments for customization.
1156
-
1157
- Returns
1158
- -------
1159
- `None`
1160
- """
1161
- c = kwargs.get("c", None)
1162
- self.colors = (
1163
- kwargs.get("colors", self.sorted_names[: len(np.unique(c))]) if c is not None else None
1164
- )
1165
-
1166
- plt.scatter(
1167
- self.reduced_data_tsne[:, 0],
1168
- self.reduced_data_tsne[:, 1],
1169
- cmap=matplotlib.colors.ListedColormap(self.colors) if c is not None else None,
1170
- c=c,
1171
- )
1172
-
1173
- if c is not None:
1174
- cb = plt.colorbar()
1175
- loc = np.arange(0, max(c), max(c) / float(len(self.colors)))
1176
- cb.set_ticks(loc)
1177
- cb.set_ticklabels(np.unique(c))
1178
-
1179
- plt.title("t-SNE Visualization of Latent Space")
1180
- plt.xlabel("t-SNE 1")
1181
- plt.ylabel("t-SNE 2")
1182
- plt.show()
1183
-
1184
- def _viz_radviz(self, data: pd.DataFrame, color_column: str, title: str) -> None:
1185
- """
1186
- Visualize the data using RadViz.
1187
-
1188
- Parameters
1189
- ----------
1190
- data : `pd.DataFrame`
1191
- The data to visualize.
1192
- color_column : `str`
1193
- The column to use for coloring.
1194
- title : `str`
1195
- The title of the plot.
1196
-
1197
- Returns
1198
- -------
1199
- `None`
1200
- """
1201
- data_normalized = data.copy(deep=True)
1202
- data_normalized.iloc[:, :-1] = (
1203
- 2.0
1204
- * (data_normalized.iloc[:, :-1] - data_normalized.iloc[:, :-1].min())
1205
- / (data_normalized.iloc[:, :-1].max() - data_normalized.iloc[:, :-1].min())
1206
- - 1
1207
- )
1208
- radviz(data_normalized, color_column, color=self.colors)
1209
- plt.title(title)
1210
- plt.show()
1211
-
1212
- def _viz_weights(
1213
- self, cmap: str = "viridis", aspect: str = "auto", highlight: bool = True, **kwargs
1214
- ) -> None:
1215
- """
1216
- Visualize the encoder layer weights of the model.
1217
-
1218
- Parameters
1219
- ----------
1220
- cmap : `str`, optional
1221
- The colormap for visualization (default is `"viridis"`).
1222
- aspect : `str`, optional
1223
- Aspect ratio for the visualization (default is `"auto"`).
1224
- highlight : `bool`, optional
1225
- Whether to highlight the maximum weights (default is `True`).
1226
- **kwargs : `dict`, optional
1227
- Additional keyword arguments for customization.
1228
-
1229
- Returns
1230
- -------
1231
- `None`
1232
- """
1233
- title = kwargs.get("title", "Encoder Layer Weights (Dense Layer)")
1234
- y_labels = kwargs.get("y_labels", None)
1235
- cmap_highlight = kwargs.get("cmap_highlight", "Pastel1")
1236
- highlight_mask = np.zeros_like(self.encoder_weights, dtype=bool)
1237
-
1238
- plt.imshow(self.encoder_weights, cmap=cmap, aspect=aspect)
1239
- plt.colorbar()
1240
- plt.title(title)
1241
- if y_labels is not None:
1242
- plt.yticks(ticks=np.arange(self.encoder_weights.shape[0]), labels=y_labels)
1243
- if highlight:
1244
- for i, j in enumerate(self.encoder_weights.argmax(axis=1)):
1245
- highlight_mask[i, j] = True
1246
- plt.imshow(
1247
- np.ma.masked_where(~highlight_mask, self.encoder_weights),
1248
- cmap=cmap_highlight,
1249
- alpha=0.5,
1250
- aspect=aspect,
1251
- )
1252
- plt.show()
1253
-
1254
- def _statistics(self, data_input: DataFrame) -> DataFrame:
1255
- """
1256
- Compute statistical summaries of the input data.
1257
-
1258
- Parameters
1259
- ----------
1260
- data_input : `DataFrame`
1261
- The data to compute statistics for.
1262
-
1263
- Returns
1264
- -------
1265
- `DataFrame` : The statistical summary of the input data.
1266
- """
1267
- data = data_input.copy(deep=True)
1268
-
1269
- if not pd.api.types.is_string_dtype(data["class"]):
1270
- data["class"] = data["class"].astype(str)
1271
-
1272
- data.ffill(inplace=True)
1273
- grouped_data = data.groupby("class")
1274
-
1275
- numerical_stats = grouped_data.agg(["mean", "min", "max", "std", "median"])
1276
- numerical_stats.columns = ["_".join(col).strip() for col in numerical_stats.columns.values]
1277
-
1278
- def get_mode(x):
1279
- mode_series = x.mode()
1280
- return mode_series.iloc[0] if not mode_series.empty else None
1281
-
1282
- mode_stats = grouped_data.apply(get_mode, include_groups=False)
1283
- mode_stats.columns = [f"{col}_mode" for col in mode_stats.columns]
1284
- combined_stats = pd.concat([numerical_stats, mode_stats], axis=1)
1285
-
1286
- return combined_stats.T
1287
-
1288
-
1289
- ########################################################################################
1290
-
1291
- if __name__ == "__main__":
1292
- # Example usage
1293
- import pandas as pd
1294
- from sklearn.datasets import load_iris
1295
- from sklearn.preprocessing import OneHotEncoder
1296
-
1297
- # Load the dataset
1298
- iris = load_iris()
1299
-
1300
- # Convert to a DataFrame for easy exploration
1301
- iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
1302
- iris_df["species"] = iris.target
1303
-
1304
- X = iris_df.drop(columns="species")
1305
- y_labels = X.columns
1306
- X = X.values
1307
- y = iris_df["species"].values
1308
-
1309
- X = np.asarray(X).astype(np.float32)
1310
-
1311
- encoder = OneHotEncoder()
1312
- y = encoder.fit_transform(y.reshape(-1, 1)).toarray()
1313
- y = np.asarray(y).astype(np.float32)
1314
-
1315
- model = AutoClassifier(
1316
- input_shape_parm=X.shape[1],
1317
- num_classes=3,
1318
- units=27,
1319
- activation="tanh",
1320
- num_layers=2,
1321
- dropout=0.2,
1322
- )
1323
- model.compile(
1324
- optimizer="adam",
1325
- loss=tf.keras.losses.CategoricalCrossentropy(),
1326
- metrics=[tf.keras.metrics.F1Score(threshold=0.5)],
1327
- )
1328
- model.fit(X, y, epochs=50, validation_split=0.2)
1329
-
1330
- insights = GetInsights(model, X)
1331
- summary = insights.predictor_analyzer(frac=1.0, y_labels=y_labels)
1332
- insights._get_tsne_repr()
1333
- insights._viz_tsne_repr()
1334
- insights._viz_tsne_repr(c=iris_df["species"])
1335
- insights._viz_weights()
1336
- print(summary)
@@ -0,0 +1,804 @@
1
+ import random
2
+ import warnings
3
+ from typing import List
4
+
5
+ import matplotlib
6
+ import matplotlib.colors as mcolors
7
+ import matplotlib.pyplot as plt
8
+ import networkx as nx
9
+ import numpy as np
10
+ import pandas as pd
11
+ import tensorflow as tf
12
+ from IPython.display import HTML, display
13
+ from matplotlib import cm
14
+ from matplotlib.colors import Normalize
15
+ from pandas.core.frame import DataFrame
16
+ from pandas.plotting import radviz
17
+ from sklearn.manifold import TSNE
18
+ from tensorflow.keras.layers import InputLayer
19
+
20
+ from likelihood.models.deep.autoencoders import AutoClassifier, sampling
21
+
22
+
23
+ class GetInsights:
24
+ """
25
+ A class to analyze the output of a neural network model, including visualizations
26
+ of the weights, t-SNE representation, and feature statistics.
27
+
28
+ Parameters
29
+ ----------
30
+ model : `AutoClassifier`
31
+ The trained model to analyze.
32
+ inputs : `np.ndarray`
33
+ The input data for analysis.
34
+ """
35
+
36
+ def __init__(self, model: AutoClassifier, inputs: np.ndarray) -> None:
37
+ """
38
+ Initializes the GetInsights class.
39
+
40
+ Parameters
41
+ ----------
42
+ model : `AutoClassifier`
43
+ The trained model to analyze.
44
+ inputs : `np.ndarray`
45
+ The input data for analysis.
46
+ """
47
+ self.inputs = inputs
48
+ self.model = model
49
+
50
+ self.encoder_layer = (
51
+ self.model.encoder.layers[1]
52
+ if isinstance(self.model.encoder.layers[0], InputLayer)
53
+ else self.model.encoder.layers[0]
54
+ )
55
+ self.decoder_layer = self.model.decoder.layers[0]
56
+
57
+ self.encoder_weights = self.encoder_layer.get_weights()[0]
58
+ self.decoder_weights = self.decoder_layer.get_weights()[0]
59
+
60
+ self.sorted_names = self._generate_sorted_color_names()
61
+
62
+ def _generate_sorted_color_names(self) -> list:
63
+ """
64
+ Generate sorted color names based on their HSV values.
65
+
66
+ Parameters
67
+ ----------
68
+ `None`
69
+
70
+ Returns
71
+ -------
72
+ `list` : Sorted color names.
73
+ """
74
+ colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)
75
+ by_hsv = sorted(
76
+ (tuple(mcolors.rgb_to_hsv(mcolors.to_rgba(color)[:3])), name)
77
+ for name, color in colors.items()
78
+ )
79
+ sorted_names = [name for hsv, name in by_hsv if hsv[1] > 0.4 and hsv[2] >= 0.4]
80
+ random.shuffle(sorted_names)
81
+ return sorted_names
82
+
83
+ def render_html_report(
84
+ self,
85
+ frac: float = 0.2,
86
+ top_k: int = 5,
87
+ threshold_factor: float = 1.0,
88
+ max_rows: int = 5,
89
+ **kwargs,
90
+ ) -> None:
91
+ """
92
+ Generate and display an embedded HTML report in a Jupyter Notebook cell.
93
+ """
94
+ display(HTML("<h2 style='margin-top:20px;'>📊 Predictor Analysis</h2>"))
95
+ display(
96
+ HTML(
97
+ "<p>This section visualizes how the model predicts the data. "
98
+ "You will see original inputs, reconstructed outputs, and analyses such as t-SNE "
99
+ "that reduce dimensionality to visualize latent space clustering.</p>"
100
+ )
101
+ )
102
+ stats_df = self.predictor_analyzer(frac=frac, **kwargs)
103
+
104
+ display(HTML("<h2 style='margin-top:30px;'>🔁 Encoder-Decoder Graph</h2>"))
105
+ display(
106
+ HTML(
107
+ "<p>This visualization displays the connections between layers in the encoder and decoder. "
108
+ "Edges with the strongest weights are highlighted to emphasize influential features "
109
+ "in the model's transformation.</p>"
110
+ )
111
+ )
112
+ self.viz_encoder_decoder_graphs(threshold_factor=threshold_factor, top_k=top_k)
113
+
114
+ display(HTML("<h2 style='margin-top:30px;'>🧠 Classifier Layer Graphs</h2>"))
115
+ display(
116
+ HTML(
117
+ "<p>This visualization shows how features propagate through each dense layer in the classifier. "
118
+ "Only the strongest weighted connections are shown to highlight influential paths through the network.</p>"
119
+ )
120
+ )
121
+ self.viz_classifier_graphs(threshold_factor=threshold_factor, top_k=top_k)
122
+
123
+ display(HTML("<h2 style='margin-top:30px;'>📈 Statistical Summary</h2>"))
124
+ display(
125
+ HTML(
126
+ "<p>This table summarizes feature statistics grouped by predicted classes, "
127
+ "including means, standard deviations, and modes, providing insight into "
128
+ "feature distributions across different classes.</p>"
129
+ )
130
+ )
131
+
132
+ if max_rows is not None and max_rows > 0:
133
+ stats_to_display = stats_df.head(max_rows)
134
+ else:
135
+ stats_to_display = stats_df
136
+
137
+ display(
138
+ stats_to_display.style.set_table_attributes(
139
+ "style='display:inline;border-collapse:collapse;'"
140
+ )
141
+ .set_caption("Feature Summary per Class")
142
+ .set_properties(
143
+ **{
144
+ "border": "1px solid #ddd",
145
+ "padding": "8px",
146
+ "text-align": "center",
147
+ }
148
+ )
149
+ )
150
+
151
+ display(
152
+ HTML(
153
+ "<p style='color: gray; margin-top:30px;'>Report generated with "
154
+ "<code>GetInsights</code> class. For detailed customization, extend "
155
+ "<code>render_html_report</code>.</p>"
156
+ )
157
+ )
158
+
159
+ def viz_classifier_graphs(self, threshold_factor=1.0, top_k=5, save_path=None):
160
+ """
161
+ Visualize all Dense layers in self.model.classifier as a single directed graph,
162
+ connecting each Dense layer to the next.
163
+ """
164
+
165
+ def get_top_k_edges(weights, src_prefix, dst_prefix, k):
166
+ flat_weights = np.abs(weights.flatten())
167
+ indices = np.argpartition(flat_weights, -k)[-k:]
168
+ top_k_flat_indices = indices[np.argsort(-flat_weights[indices])]
169
+ top_k_edges = []
170
+
171
+ for flat_index in top_k_flat_indices:
172
+ i, j = np.unravel_index(flat_index, weights.shape)
173
+ top_k_edges.append((f"{src_prefix}_{i}", f"{dst_prefix}_{j}", weights[i, j]))
174
+ return top_k_edges
175
+
176
+ def add_dense_layer_edges(G, weights, layer_idx, threshold_factor, top_k):
177
+ src_prefix = f"L{layer_idx}"
178
+ dst_prefix = f"L{layer_idx + 1}"
179
+ input_nodes = [f"{src_prefix}_{i}" for i in range(weights.shape[0])]
180
+ output_nodes = [f"{dst_prefix}_{j}" for j in range(weights.shape[1])]
181
+
182
+ G.add_nodes_from(input_nodes + output_nodes)
183
+
184
+ abs_weights = np.abs(weights)
185
+ threshold = threshold_factor * np.mean(abs_weights)
186
+ top_k_edges = get_top_k_edges(weights, src_prefix, dst_prefix, top_k)
187
+ top_k_set = set((u, v) for u, v, _ in top_k_edges)
188
+
189
+ for i, src in enumerate(input_nodes):
190
+ for j, dst in enumerate(output_nodes):
191
+ w = weights[i, j]
192
+ if abs(w) > threshold:
193
+ G.add_edge(src, dst, weight=w, highlight=(src, dst) in top_k_set)
194
+
195
+ def compute_layout(G):
196
+ pos = {}
197
+ layer_nodes = {}
198
+
199
+ for node in G.nodes():
200
+ layer_idx = int(node.split("_")[0][1:])
201
+ layer_nodes.setdefault(layer_idx, []).append(node)
202
+
203
+ for layer_idx, nodes in sorted(layer_nodes.items()):
204
+ y_positions = np.linspace(1, -1, len(nodes))
205
+ for y, node in zip(y_positions, nodes):
206
+ pos[node] = (layer_idx * 2, y)
207
+
208
+ return pos
209
+
210
+ def draw_graph(G, pos, title, save_path=None):
211
+ weights = [abs(G[u][v]["weight"]) for u, v in G.edges()]
212
+ if not weights:
213
+ print("No edges to draw.")
214
+ return
215
+
216
+ norm = Normalize(vmin=min(weights), vmax=max(weights))
217
+ cmap = cm.get_cmap("coolwarm")
218
+
219
+ edge_colors = [cmap(norm(G[u][v]["weight"])) for u, v in G.edges()]
220
+ edge_widths = [1.0 + 2.0 * norm(abs(G[u][v]["weight"])) for u, v in G.edges()]
221
+
222
+ fig, ax = plt.subplots(figsize=(12, 8))
223
+
224
+ nx.draw(
225
+ G,
226
+ pos,
227
+ ax=ax,
228
+ with_labels=True,
229
+ node_color="lightgray",
230
+ node_size=1000,
231
+ font_size=8,
232
+ edge_color=edge_colors,
233
+ width=edge_widths,
234
+ arrows=True,
235
+ )
236
+
237
+ ax.set_title(title, fontsize=14)
238
+
239
+ sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
240
+ sm.set_array([])
241
+ plt.colorbar(sm, ax=ax, orientation="vertical", label="Edge Weight")
242
+
243
+ plt.tight_layout()
244
+ if save_path:
245
+ plt.savefig(save_path)
246
+ plt.show()
247
+
248
+ dense_layers = [
249
+ layer
250
+ for layer in self.model.classifier.layers
251
+ if isinstance(layer, tf.keras.layers.Dense)
252
+ ]
253
+
254
+ if len(dense_layers) < 1:
255
+ print("No Dense layers found in classifier.")
256
+ return
257
+
258
+ G = nx.DiGraph()
259
+ for idx, layer in enumerate(dense_layers):
260
+ weights = layer.get_weights()[0]
261
+ add_dense_layer_edges(G, weights, idx, threshold_factor, top_k)
262
+
263
+ pos = compute_layout(G)
264
+ draw_graph(G, pos, "Classifier Dense Layers Graph", save_path)
265
+
266
+ def viz_encoder_decoder_graphs(self, threshold_factor=1.0, top_k=5, save_path=None):
267
+ """
268
+ Visualize Dense layers in self.model.encoder and self.model.decoder as directed graphs.
269
+ """
270
+
271
+ def get_top_k_edges(weights, labels_src, labels_dst_prefix, k):
272
+ flat_weights = np.abs(weights.flatten())
273
+ indices = np.argpartition(flat_weights, -k)[-k:]
274
+ top_k_flat_indices = indices[np.argsort(-flat_weights[indices])]
275
+ top_k_edges = []
276
+ for flat_index in top_k_flat_indices:
277
+ i, j = np.unravel_index(flat_index, weights.shape)
278
+ src_label = labels_src[i] if isinstance(labels_src, list) else f"{labels_src}_{i}"
279
+ dst_label = f"{labels_dst_prefix}_{j}"
280
+ top_k_edges.append((src_label, dst_label, weights[i, j]))
281
+ return top_k_edges
282
+
283
+ def add_layer_to_graph(
284
+ G, weights, labels_src, labels_dst_prefix, x_offset, top_k_set, threshold
285
+ ):
286
+ output_nodes = [f"{labels_dst_prefix}_{j}" for j in range(weights.shape[1])]
287
+
288
+ for node in labels_src + output_nodes:
289
+ if node not in G:
290
+ G.add_node(node, x=x_offset if node in labels_src else x_offset + 1)
291
+
292
+ for i, src in enumerate(labels_src):
293
+ for j, dst in enumerate(output_nodes):
294
+ w = weights[i, j]
295
+ if abs(w) > threshold:
296
+ G.add_edge(src, dst, weight=w, highlight=(src, dst) in top_k_set)
297
+ return output_nodes
298
+
299
+ def layout_graph(G):
300
+ pos = {}
301
+ layers = {}
302
+ for node, data in G.nodes(data=True):
303
+ x = data["x"]
304
+ layers.setdefault(x, []).append(node)
305
+
306
+ for x in sorted(layers):
307
+ nodes = layers[x]
308
+ y_positions = np.linspace(1, -1, len(nodes))
309
+ for y, node in zip(y_positions, nodes):
310
+ pos[node] = (x, y)
311
+ return pos
312
+
313
+ def draw_graph(G, title, ax):
314
+ weights = [abs(G[u][v]["weight"]) for u, v in G.edges()]
315
+ if not weights:
316
+ return
317
+
318
+ norm = Normalize(vmin=min(weights), vmax=max(weights))
319
+ cmap = cm.get_cmap("coolwarm")
320
+
321
+ edge_colors = [cmap(norm(G[u][v]["weight"])) for u, v in G.edges()]
322
+ edge_widths = [1.0 + 2.0 * norm(abs(G[u][v]["weight"])) for u, v in G.edges()]
323
+
324
+ pos = layout_graph(G)
325
+ nx.draw(
326
+ G,
327
+ pos,
328
+ ax=ax,
329
+ with_labels=True,
330
+ node_color="lightgray",
331
+ node_size=1000,
332
+ font_size=8,
333
+ edge_color=edge_colors,
334
+ width=edge_widths,
335
+ arrows=True,
336
+ )
337
+
338
+ ax.set_title(title, fontsize=12)
339
+ sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
340
+ sm.set_array([])
341
+ plt.colorbar(sm, ax=ax, orientation="vertical", label="Edge Weight")
342
+
343
+ def build_graph(layers, label_prefix, input_labels=None):
344
+ G = nx.DiGraph()
345
+ x_offset = 0
346
+ prev_labels = input_labels or [
347
+ f"{label_prefix}0_{i}" for i in range(layers[0].get_weights()[0].shape[0])
348
+ ]
349
+
350
+ for idx, layer in enumerate(layers):
351
+ weights = layer.get_weights()[0]
352
+ label = f"{label_prefix}{idx+1}"
353
+ threshold = threshold_factor * np.mean(np.abs(weights))
354
+ top_k_edges = get_top_k_edges(weights, prev_labels, label, top_k)
355
+ top_k_set = set((src, dst) for src, dst, _ in top_k_edges)
356
+
357
+ prev_labels = add_layer_to_graph(
358
+ G, weights, prev_labels, label, x_offset, top_k_set, threshold
359
+ )
360
+ x_offset += 2
361
+
362
+ return G
363
+
364
+ encoder_layers = [
365
+ l for l in self.model.encoder.layers if isinstance(l, tf.keras.layers.Dense)
366
+ ]
367
+ decoder_layers = [
368
+ l for l in self.model.decoder.layers if isinstance(l, tf.keras.layers.Dense)
369
+ ]
370
+
371
+ if not encoder_layers and not decoder_layers:
372
+ print("No Dense layers found in encoder or decoder.")
373
+ return
374
+
375
+ n_graphs = int(bool(encoder_layers)) + int(bool(decoder_layers))
376
+ fig, axes = plt.subplots(1, n_graphs, figsize=(7 * n_graphs, 6), squeeze=False)
377
+
378
+ col = 0
379
+ if encoder_layers:
380
+ input_labels = (
381
+ self.y_labels
382
+ if self.y_labels
383
+ and len(self.y_labels) == encoder_layers[0].get_weights()[0].shape[0]
384
+ else None
385
+ )
386
+ encoder_graph = build_graph(encoder_layers, "E", input_labels)
387
+ draw_graph(encoder_graph, "Encoder", axes[0][col])
388
+ col += 1
389
+
390
+ if decoder_layers:
391
+ decoder_graph = build_graph(decoder_layers, "D")
392
+ draw_graph(decoder_graph, "Decoder", axes[0][col])
393
+
394
+ fig.suptitle("Encoder & Decoder Dense Layer Graphs", fontsize=15)
395
+ plt.tight_layout(rect=[0, 0, 1, 0.95])
396
+
397
+ if save_path:
398
+ plt.savefig(save_path)
399
+ plt.show()
400
+
401
+ if encoder_layers:
402
+ weights = encoder_layers[0].get_weights()[0]
403
+ importances = np.abs(weights).mean(axis=1)
404
+ sorted_idx = np.argsort(-importances)
405
+ xticks = [
406
+ (
407
+ self.y_labels[i]
408
+ if self.y_labels and len(self.y_labels) == weights.shape[0]
409
+ else f"Input_{i}"
410
+ )
411
+ for i in sorted_idx
412
+ ]
413
+
414
+ plt.figure(figsize=(10, 4))
415
+ plt.bar(range(len(importances)), importances[sorted_idx], color="skyblue")
416
+ plt.xticks(range(len(importances)), xticks, rotation=45, ha="right")
417
+ plt.title("Feature Importances (Encoder Input Layer)", fontsize=13)
418
+ plt.ylabel("Mean |Weight|")
419
+ plt.tight_layout()
420
+ plt.show()
421
+
422
+ def predictor_analyzer(
423
+ self,
424
+ frac: float = None,
425
+ cmap: str = "viridis",
426
+ aspect: str = "auto",
427
+ highlight: bool = True,
428
+ **kwargs,
429
+ ) -> None:
430
+ """
431
+ Analyze the model's predictions and visualize data.
432
+
433
+ Parameters
434
+ ----------
435
+ frac : `float`, optional
436
+ Fraction of data to use for analysis (default is `None`).
437
+ cmap : `str`, optional
438
+ The colormap for visualization (default is `"viridis"`).
439
+ aspect : `str`, optional
440
+ Aspect ratio for the visualization (default is `"auto"`).
441
+ highlight : `bool`, optional
442
+ Whether to highlight the maximum weights (default is `True`).
443
+ **kwargs : `dict`, optional
444
+ Additional keyword arguments for customization.
445
+
446
+ Returns
447
+ -------
448
+ `DataFrame` : The statistical summary of the input data.
449
+ """
450
+ self._viz_weights(cmap=cmap, aspect=aspect, highlight=highlight, **kwargs)
451
+ inputs = self.inputs.copy()
452
+ inputs = self._prepare_inputs(inputs, frac)
453
+ self.y_labels = kwargs.get("y_labels", None)
454
+ encoded, reconstructed = self._encode_decode(inputs)
455
+ self._visualize_data(inputs, reconstructed, cmap, aspect)
456
+ self._prepare_data_for_analysis(inputs, reconstructed, encoded, self.y_labels)
457
+
458
+ try:
459
+ self._get_tsne_repr(inputs, frac)
460
+ self._viz_tsne_repr(c=self.classification)
461
+
462
+ self._viz_radviz(self.data, "class", "Radviz Visualization of Latent Space")
463
+ self._viz_radviz(self.data_input, "class", "Radviz Visualization of Input Data")
464
+ except ValueError:
465
+ warnings.warn(
466
+ "Some functions or processes will not be executed for regression problems.",
467
+ UserWarning,
468
+ )
469
+
470
+ return self._statistics(self.data_input)
471
+
472
+ def _prepare_inputs(self, inputs: np.ndarray, frac: float) -> np.ndarray:
473
+ """
474
+ Prepare the input data, possibly selecting a fraction of it.
475
+
476
+ Parameters
477
+ ----------
478
+ inputs : `np.ndarray`
479
+ The input data.
480
+ frac : `float`
481
+ Fraction of data to use.
482
+
483
+ Returns
484
+ -------
485
+ `np.ndarray` : The prepared input data.
486
+ """
487
+ if frac:
488
+ n = int(frac * self.inputs.shape[0])
489
+ indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
490
+ inputs = inputs[indexes]
491
+ inputs[np.isnan(inputs)] = 0.0
492
+ return inputs
493
+
494
+ def _encode_decode(self, inputs: np.ndarray) -> tuple:
495
+ """
496
+ Perform encoding and decoding on the input data.
497
+
498
+ Parameters
499
+ ----------
500
+ inputs : `np.ndarray`
501
+ The input data.
502
+
503
+ Returns
504
+ -------
505
+ `tuple` : The encoded and reconstructed data.
506
+ """
507
+ try:
508
+ mean, log_var = self.model.encoder(inputs)
509
+ encoded = sampling(mean, log_var)
510
+ except:
511
+ encoded = self.model.encoder(inputs)
512
+ reconstructed = self.model.decoder(encoded)
513
+ return encoded, reconstructed
514
+
515
+ def _visualize_data(
516
+ self, inputs: np.ndarray, reconstructed: np.ndarray, cmap: str, aspect: str
517
+ ) -> None:
518
+ """
519
+ Visualize the original data and the reconstructed data.
520
+
521
+ Parameters
522
+ ----------
523
+ inputs : `np.ndarray`
524
+ The input data.
525
+ reconstructed : `np.ndarray`
526
+ The reconstructed data.
527
+ cmap : `str`
528
+ The colormap for visualization.
529
+ aspect : `str`
530
+ Aspect ratio for the visualization.
531
+
532
+ Returns
533
+ -------
534
+ `None`
535
+ """
536
+ ax = plt.subplot(1, 2, 1)
537
+ plt.imshow(inputs, cmap=cmap, aspect=aspect)
538
+ plt.colorbar()
539
+ plt.title("Original Data")
540
+
541
+ plt.subplot(1, 2, 2, sharex=ax, sharey=ax)
542
+ plt.imshow(reconstructed, cmap=cmap, aspect=aspect)
543
+ plt.colorbar()
544
+ plt.title("Decoder Layer Reconstruction")
545
+ plt.show()
546
+
547
+ def _prepare_data_for_analysis(
548
+ self,
549
+ inputs: np.ndarray,
550
+ reconstructed: np.ndarray,
551
+ encoded: np.ndarray,
552
+ y_labels: List[str],
553
+ ) -> None:
554
+ """
555
+ Prepare data for statistical analysis.
556
+
557
+ Parameters
558
+ ----------
559
+ inputs : `np.ndarray`
560
+ The input data.
561
+ reconstructed : `np.ndarray`
562
+ The reconstructed data.
563
+ encoded : `np.ndarray`
564
+ The encoded data.
565
+ y_labels : `List[str]`
566
+ The labels of features.
567
+
568
+ Returns
569
+ -------
570
+ `None`
571
+ """
572
+ self.classification = (
573
+ self.model.classifier(tf.concat([reconstructed, encoded], axis=1))
574
+ .numpy()
575
+ .argmax(axis=1)
576
+ )
577
+
578
+ self.data = pd.DataFrame(encoded, columns=[f"Feature {i}" for i in range(encoded.shape[1])])
579
+ self.data_input = pd.DataFrame(
580
+ inputs,
581
+ columns=(
582
+ [f"Feature {i}" for i in range(inputs.shape[1])] if y_labels is None else y_labels
583
+ ),
584
+ )
585
+
586
+ self.data["class"] = self.classification
587
+ self.data_input["class"] = self.classification
588
+
589
+ def _get_tsne_repr(self, inputs: np.ndarray = None, frac: float = None) -> None:
590
+ """
591
+ Perform t-SNE dimensionality reduction on the input data.
592
+
593
+ Parameters
594
+ ----------
595
+ inputs : `np.ndarray`
596
+ The input data.
597
+ frac : `float`
598
+ Fraction of data to use.
599
+
600
+ Returns
601
+ -------
602
+ `None`
603
+ """
604
+ if inputs is None:
605
+ inputs = self.inputs.copy()
606
+ if frac:
607
+ n = int(frac * self.inputs.shape[0])
608
+ indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
609
+ inputs = inputs[indexes]
610
+ inputs[np.isnan(inputs)] = 0.0
611
+ self.latent_representations = inputs @ self.encoder_weights
612
+
613
+ tsne = TSNE(n_components=2)
614
+ self.reduced_data_tsne = tsne.fit_transform(self.latent_representations)
615
+
616
+ def _viz_tsne_repr(self, **kwargs) -> None:
617
+ """
618
+ Visualize the t-SNE representation of the latent space.
619
+
620
+ Parameters
621
+ ----------
622
+ **kwargs : `dict`
623
+ Additional keyword arguments for customization.
624
+
625
+ Returns
626
+ -------
627
+ `None`
628
+ """
629
+ c = kwargs.get("c", None)
630
+ self.colors = (
631
+ kwargs.get("colors", self.sorted_names[: len(np.unique(c))]) if c is not None else None
632
+ )
633
+
634
+ plt.scatter(
635
+ self.reduced_data_tsne[:, 0],
636
+ self.reduced_data_tsne[:, 1],
637
+ cmap=matplotlib.colors.ListedColormap(self.colors) if c is not None else None,
638
+ c=c,
639
+ )
640
+
641
+ if c is not None:
642
+ cb = plt.colorbar()
643
+ loc = np.arange(0, max(c), max(c) / float(len(self.colors)))
644
+ cb.set_ticks(loc)
645
+ cb.set_ticklabels(np.unique(c))
646
+
647
+ plt.title("t-SNE Visualization of Latent Space")
648
+ plt.xlabel("t-SNE 1")
649
+ plt.ylabel("t-SNE 2")
650
+ plt.show()
651
+
652
+ def _viz_radviz(self, data: pd.DataFrame, color_column: str, title: str) -> None:
653
+ """
654
+ Visualize the data using RadViz.
655
+
656
+ Parameters
657
+ ----------
658
+ data : `pd.DataFrame`
659
+ The data to visualize.
660
+ color_column : `str`
661
+ The column to use for coloring.
662
+ title : `str`
663
+ The title of the plot.
664
+
665
+ Returns
666
+ -------
667
+ `None`
668
+ """
669
+ data_normalized = data.copy(deep=True)
670
+ data_normalized.iloc[:, :-1] = (
671
+ 2.0
672
+ * (data_normalized.iloc[:, :-1] - data_normalized.iloc[:, :-1].min())
673
+ / (data_normalized.iloc[:, :-1].max() - data_normalized.iloc[:, :-1].min())
674
+ - 1
675
+ )
676
+ radviz(data_normalized, color_column, color=self.colors)
677
+ plt.title(title)
678
+ plt.show()
679
+
680
+ def _viz_weights(
681
+ self, cmap: str = "viridis", aspect: str = "auto", highlight: bool = True, **kwargs
682
+ ) -> None:
683
+ """
684
+ Visualize the encoder layer weights of the model.
685
+
686
+ Parameters
687
+ ----------
688
+ cmap : `str`, optional
689
+ The colormap for visualization (default is `"viridis"`).
690
+ aspect : `str`, optional
691
+ Aspect ratio for the visualization (default is `"auto"`).
692
+ highlight : `bool`, optional
693
+ Whether to highlight the maximum weights (default is `True`).
694
+ **kwargs : `dict`, optional
695
+ Additional keyword arguments for customization.
696
+
697
+ Returns
698
+ -------
699
+ `None`
700
+ """
701
+ title = kwargs.get("title", "Encoder Layer Weights (Dense Layer)")
702
+ y_labels = kwargs.get("y_labels", None)
703
+ cmap_highlight = kwargs.get("cmap_highlight", "Pastel1")
704
+ highlight_mask = np.zeros_like(self.encoder_weights, dtype=bool)
705
+
706
+ plt.imshow(self.encoder_weights, cmap=cmap, aspect=aspect)
707
+ plt.colorbar()
708
+ plt.title(title)
709
+ if y_labels is not None:
710
+ plt.yticks(ticks=np.arange(self.encoder_weights.shape[0]), labels=y_labels)
711
+ if highlight:
712
+ for i, j in enumerate(self.encoder_weights.argmax(axis=1)):
713
+ highlight_mask[i, j] = True
714
+ plt.imshow(
715
+ np.ma.masked_where(~highlight_mask, self.encoder_weights),
716
+ cmap=cmap_highlight,
717
+ alpha=0.5,
718
+ aspect=aspect,
719
+ )
720
+ plt.show()
721
+
722
+ def _statistics(self, data_input: DataFrame) -> DataFrame:
723
+ """
724
+ Compute statistical summaries of the input data.
725
+
726
+ Parameters
727
+ ----------
728
+ data_input : `DataFrame`
729
+ The data to compute statistics for.
730
+
731
+ Returns
732
+ -------
733
+ `DataFrame` : The statistical summary of the input data.
734
+ """
735
+ data = data_input.copy(deep=True)
736
+
737
+ if not pd.api.types.is_string_dtype(data["class"]):
738
+ data["class"] = data["class"].astype(str)
739
+
740
+ data.ffill(inplace=True)
741
+ grouped_data = data.groupby("class")
742
+
743
+ numerical_stats = grouped_data.agg(["mean", "min", "max", "std", "median"])
744
+ numerical_stats.columns = ["_".join(col).strip() for col in numerical_stats.columns.values]
745
+
746
+ def get_mode(x):
747
+ mode_series = x.mode()
748
+ return mode_series.iloc[0] if not mode_series.empty else None
749
+
750
+ mode_stats = grouped_data.apply(get_mode, include_groups=False)
751
+ mode_stats.columns = [f"{col}_mode" for col in mode_stats.columns]
752
+ combined_stats = pd.concat([numerical_stats, mode_stats], axis=1)
753
+
754
+ return combined_stats.T
755
+
756
+
757
+ ########################################################################################
758
+
759
+ if __name__ == "__main__":
760
+ # Example usage
761
+ import pandas as pd
762
+ from sklearn.datasets import load_iris
763
+ from sklearn.preprocessing import OneHotEncoder
764
+
765
+ # Load the dataset
766
+ iris = load_iris()
767
+
768
+ # Convert to a DataFrame for easy exploration
769
+ iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
770
+ iris_df["species"] = iris.target
771
+
772
+ X = iris_df.drop(columns="species")
773
+ y_labels = X.columns
774
+ X = X.values
775
+ y = iris_df["species"].values
776
+
777
+ X = np.asarray(X).astype(np.float32)
778
+
779
+ encoder = OneHotEncoder()
780
+ y = encoder.fit_transform(y.reshape(-1, 1)).toarray()
781
+ y = np.asarray(y).astype(np.float32)
782
+
783
+ model = AutoClassifier(
784
+ input_shape_parm=X.shape[1],
785
+ num_classes=3,
786
+ units=27,
787
+ activation="tanh",
788
+ num_layers=2,
789
+ dropout=0.2,
790
+ )
791
+ model.compile(
792
+ optimizer="adam",
793
+ loss=tf.keras.losses.CategoricalCrossentropy(),
794
+ metrics=[tf.keras.metrics.F1Score(threshold=0.5)],
795
+ )
796
+ model.fit(X, y, epochs=50, validation_split=0.2)
797
+
798
+ insights = GetInsights(model, X)
799
+ summary = insights.predictor_analyzer(frac=1.0, y_labels=y_labels)
800
+ insights._get_tsne_repr()
801
+ insights._viz_tsne_repr()
802
+ insights._viz_tsne_repr(c=iris_df["species"])
803
+ insights._viz_weights()
804
+ print(summary)
likelihood/tools/tools.py CHANGED
@@ -653,7 +653,7 @@ def cal_average(y: np.ndarray, alpha: float = 1):
653
653
  class DataScaler:
654
654
  """numpy array `scaler` and `rescaler`"""
655
655
 
656
- __slots__ = ["dataset_", "_n", "data_scaled", "values", "transpose", "inv_fitting"]
656
+ __slots__ = ["dataset_", "_n", "data_scaled", "values", "inv_fitting"]
657
657
 
658
658
  def __init__(self, dataset: np.ndarray, n: int = 1) -> None:
659
659
  """Initializes the parameters required for scaling the data"""
@@ -695,11 +695,6 @@ class DataScaler:
695
695
  msg = "Trying to access an item at an invalid index."
696
696
  print(f"{error_type}: {msg}")
697
697
  return None
698
- if self.dataset_.shape[0] > self.dataset_.shape[1]:
699
- self.dataset_ = self.dataset_.T
700
- self.transpose = True
701
- else:
702
- self.transpose = False
703
698
  for i in range(self.dataset_.shape[0]):
704
699
  if self._n != None:
705
700
  fit = np.polyfit(xaxis, self.dataset_[i, :], self._n)
@@ -737,14 +732,13 @@ class DataScaler:
737
732
  dataset_ : `np.array`
738
733
  An array containing the rescaled data.
739
734
  """
740
- if self.transpose:
741
- dataset_ = dataset_.T
742
735
  for i in range(dataset_.shape[0]):
743
736
  dataset_[i, :] += 1
744
737
  dataset_[i, :] /= 2
745
738
  dataset_[i, :] = dataset_[i, :] * self.values[1][i]
746
739
  dataset_[i, :] += self.values[0][i]
747
- dataset_[i, :] += self.values[2][i](range(dataset_.shape[1]))
740
+ if self._n != None:
741
+ dataset_[i, :] += self.values[2][i](range(dataset_.shape[1]))
748
742
 
749
743
  return dataset_
750
744
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: likelihood
3
- Version: 1.5.5
3
+ Version: 1.5.7
4
4
  Summary: A package that performs the maximum likelihood algorithm.
5
5
  Home-page: https://github.com/jzsmoreno/likelihood/
6
6
  Author: J. A. Moreno-Guerra
@@ -49,7 +49,7 @@ Dynamic: requires-dist
49
49
  Dynamic: requires-python
50
50
  Dynamic: summary
51
51
 
52
- ![likelihood](https://raw.githubusercontent.com/RodolfoFerro/likelihood/main/likelihood.png)
52
+ ![likelihood](https://raw.githubusercontent.com/jzsmoreno/likelihood/main/likelihood.png)
53
53
 
54
54
  ![GitHub last commit](https://img.shields.io/github/last-commit/jzsmoreno/likelihood?style=for-the-badge)
55
55
  ![GitHub repo size](https://img.shields.io/github/repo-size/jzsmoreno/likelihood?style=for-the-badge)
@@ -8,17 +8,18 @@ likelihood/models/hmm.py,sha256=0s0gFySH1u4NjRaZDxiZ8oeTaFhFrw1x0GJxwy3dFrA,6253
8
8
  likelihood/models/regression.py,sha256=9cakyGlJCEO6WfpoKLh3GxdXQeQp7cUvJIkQ5odT0TA,9404
9
9
  likelihood/models/simulation.py,sha256=6OD2IXAnbctxtOzUJ2b9vKW7_tdGs4dQYmQQShqsioA,8443
10
10
  likelihood/models/utils.py,sha256=dvigPi_hxcs5ntfHr7Y1JvP5ULtMW3kkN0nJpS4orE8,1319
11
- likelihood/models/deep/__init__.py,sha256=m607FtMP2gAfPtM0mssFXMKyKOqoeYskZ_xIC6dKhr4,47
12
- likelihood/models/deep/autoencoders.py,sha256=0EIZwDNlZ9NCfQbhQ_KdXkkRwIjUEU-jk0l0u-J1wmA,44212
11
+ likelihood/models/deep/__init__.py,sha256=UV_VYhySvrNnB4a0VXYM4wK3KKF7ytjLFFfwvnaZWaA,82
12
+ likelihood/models/deep/autoencoders.py,sha256=9-ZOKbS02tojCufg_Fbd5_Z48pSFSqZnfZZJVohNqdk,29985
13
13
  likelihood/models/deep/gan.py,sha256=aoSaNO5LvCU62cjxA0AxvnQvE7NSFtrp1Ta4EDJchpo,10874
14
+ likelihood/models/deep/predictor.py,sha256=Z6GVm9ciz90cMcp4Q6Lvm-_8_9ZOxX1kBquReW2aGqM,27688
14
15
  likelihood/tools/__init__.py,sha256=N1IhMDzacsGQT2MIYBMBC0zTxes78vC_0gGrwkuPgmg,78
15
16
  likelihood/tools/figures.py,sha256=waF0NHIMrctCmaLhcuz5DMcXyRKynmn6aG0XITYCTLc,10940
16
17
  likelihood/tools/impute.py,sha256=n87Tv-xLUAdPl7BQLFcLWSsXBZbXksahyCayJWMydXc,9485
17
18
  likelihood/tools/models_tools.py,sha256=c3-vac-1MYSarYDtfR6XfVC7X_WY9auS7y2_3Z973IQ,8875
18
19
  likelihood/tools/numeric_tools.py,sha256=Hwf-lbqROqPPZ9N7eVzKIDyZxFGQdP53isWxPqpG0eo,12254
19
- likelihood/tools/tools.py,sha256=FyldbmYNgt4gK89BKgDsya2_EIENwZZwdbBx5pfNhj4,42281
20
- likelihood-1.5.5.dist-info/licenses/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
21
- likelihood-1.5.5.dist-info/METADATA,sha256=jtu0BJ0483cmd4DAKqqn_rsSru1-LVS2Wmj998jMkoA,2886
22
- likelihood-1.5.5.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
23
- likelihood-1.5.5.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
24
- likelihood-1.5.5.dist-info/RECORD,,
20
+ likelihood/tools/tools.py,sha256=lk9BIskjUKYQ1XVwARm9jAjHuLQ4UO68aZY8oxkzk5c,42056
21
+ likelihood-1.5.7.dist-info/licenses/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
22
+ likelihood-1.5.7.dist-info/METADATA,sha256=V8yQ5NJPbMyxOB7sICsp5QCkZ8MZhxkfS-4WCWMrFG0,2883
23
+ likelihood-1.5.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
+ likelihood-1.5.7.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
25
+ likelihood-1.5.7.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.7.1)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5