likelihood 1.2.24__py3-none-any.whl → 1.2.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- likelihood/graph/nn.py +5 -7
- likelihood/models/deep/autoencoders.py +325 -36
- {likelihood-1.2.24.dist-info → likelihood-1.2.25.dist-info}/METADATA +15 -3
- {likelihood-1.2.24.dist-info → likelihood-1.2.25.dist-info}/RECORD +7 -7
- {likelihood-1.2.24.dist-info → likelihood-1.2.25.dist-info}/WHEEL +1 -1
- {likelihood-1.2.24.dist-info → likelihood-1.2.25.dist-info}/LICENSE +0 -0
- {likelihood-1.2.24.dist-info → likelihood-1.2.25.dist-info}/top_level.txt +0 -0
likelihood/graph/nn.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import os
|
|
2
3
|
|
|
3
|
-
os.environ["
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
import logging
|
|
4
|
+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
5
|
+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
|
|
6
|
+
|
|
7
7
|
import warnings
|
|
8
8
|
from typing import List, Tuple
|
|
9
9
|
|
|
@@ -17,9 +17,7 @@ from sklearn.model_selection import train_test_split
|
|
|
17
17
|
|
|
18
18
|
from likelihood.tools import generate_feature_yaml
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
|
|
20
|
+
tf.get_logger().setLevel("ERROR")
|
|
23
21
|
|
|
24
22
|
|
|
25
23
|
def compare_similarity(arr1: np.ndarray, arr2: np.ndarray) -> int:
|
|
@@ -1,19 +1,40 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
|
+
import random
|
|
3
4
|
from functools import partial
|
|
4
5
|
from shutil import rmtree
|
|
5
6
|
|
|
6
|
-
import
|
|
7
|
+
import matplotlib
|
|
8
|
+
import matplotlib.colors as mcolors
|
|
9
|
+
import matplotlib.pyplot as plt
|
|
7
10
|
import numpy as np
|
|
8
11
|
import pandas as pd
|
|
12
|
+
from pandas.plotting import radviz
|
|
13
|
+
|
|
14
|
+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
15
|
+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
|
|
16
|
+
|
|
17
|
+
import warnings
|
|
18
|
+
from functools import wraps
|
|
19
|
+
|
|
20
|
+
import keras_tuner
|
|
9
21
|
import tensorflow as tf
|
|
10
22
|
from pandas.core.frame import DataFrame
|
|
23
|
+
from sklearn.manifold import TSNE
|
|
11
24
|
|
|
12
25
|
from likelihood.tools import OneHotEncoder
|
|
13
26
|
|
|
14
|
-
|
|
27
|
+
tf.get_logger().setLevel("ERROR")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def suppress_warnings(func):
|
|
31
|
+
@wraps(func)
|
|
32
|
+
def wrapper(*args, **kwargs):
|
|
33
|
+
with warnings.catch_warnings():
|
|
34
|
+
warnings.simplefilter("ignore")
|
|
35
|
+
return func(*args, **kwargs)
|
|
15
36
|
|
|
16
|
-
|
|
37
|
+
return wrapper
|
|
17
38
|
|
|
18
39
|
|
|
19
40
|
@tf.keras.utils.register_keras_serializable(package="Custom", name="AutoClassifier")
|
|
@@ -35,7 +56,7 @@ class AutoClassifier(tf.keras.Model):
|
|
|
35
56
|
from_config(cls, config): Recreates an instance of AutoClassifier from its configuration.
|
|
36
57
|
"""
|
|
37
58
|
|
|
38
|
-
def __init__(self, input_shape_parm, num_classes, units, activation):
|
|
59
|
+
def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
|
|
39
60
|
"""
|
|
40
61
|
Initializes an AutoClassifier instance with the given parameters.
|
|
41
62
|
|
|
@@ -49,6 +70,15 @@ class AutoClassifier(tf.keras.Model):
|
|
|
49
70
|
The number of neurons in each hidden layer.
|
|
50
71
|
activation : `str`
|
|
51
72
|
The type of activation function to use for the neural network layers.
|
|
73
|
+
|
|
74
|
+
Keyword Arguments:
|
|
75
|
+
----------
|
|
76
|
+
Additional keyword arguments to pass to the model.
|
|
77
|
+
|
|
78
|
+
classifier_activation : `str`
|
|
79
|
+
The activation function to use for the classifier layer. Default is "softmax". If the activation function is not a classification function, the model can be used in regression problems.
|
|
80
|
+
num_layers : `int`
|
|
81
|
+
The number of hidden layers in the classifier. Default is 1.
|
|
52
82
|
"""
|
|
53
83
|
super(AutoClassifier, self).__init__()
|
|
54
84
|
self.input_shape_parm = input_shape_parm
|
|
@@ -59,6 +89,8 @@ class AutoClassifier(tf.keras.Model):
|
|
|
59
89
|
self.encoder = None
|
|
60
90
|
self.decoder = None
|
|
61
91
|
self.classifier = None
|
|
92
|
+
self.classifier_activation = kwargs.get("classifier_activation", "softmax")
|
|
93
|
+
self.num_layers = kwargs.get("num_layers", 1)
|
|
62
94
|
|
|
63
95
|
def build(self, input_shape):
|
|
64
96
|
self.encoder = tf.keras.Sequential(
|
|
@@ -75,8 +107,14 @@ class AutoClassifier(tf.keras.Model):
|
|
|
75
107
|
]
|
|
76
108
|
)
|
|
77
109
|
|
|
78
|
-
self.classifier = tf.keras.Sequential(
|
|
79
|
-
|
|
110
|
+
self.classifier = tf.keras.Sequential()
|
|
111
|
+
if self.num_layers > 1:
|
|
112
|
+
for _ in range(self.num_layers - 1):
|
|
113
|
+
self.classifier.add(
|
|
114
|
+
tf.keras.layers.Dense(units=self.units, activation=self.activation)
|
|
115
|
+
)
|
|
116
|
+
self.classifier.add(
|
|
117
|
+
tf.keras.layers.Dense(units=self.num_classes, activation=self.classifier_activation)
|
|
80
118
|
)
|
|
81
119
|
|
|
82
120
|
def call(self, x):
|
|
@@ -92,6 +130,8 @@ class AutoClassifier(tf.keras.Model):
|
|
|
92
130
|
"num_classes": self.num_classes,
|
|
93
131
|
"units": self.units,
|
|
94
132
|
"activation": self.activation,
|
|
133
|
+
"classifier_activation": self.classifier_activation,
|
|
134
|
+
"num_layers": self.num_layers,
|
|
95
135
|
}
|
|
96
136
|
base_config = super(AutoClassifier, self).get_config()
|
|
97
137
|
return dict(list(base_config.items()) + list(config.items()))
|
|
@@ -103,6 +143,8 @@ class AutoClassifier(tf.keras.Model):
|
|
|
103
143
|
num_classes=config["num_classes"],
|
|
104
144
|
units=config["units"],
|
|
105
145
|
activation=config["activation"],
|
|
146
|
+
classifier_activation=config["classifier_activation"],
|
|
147
|
+
num_layers=config["num_layers"],
|
|
106
148
|
)
|
|
107
149
|
|
|
108
150
|
|
|
@@ -113,6 +155,7 @@ def call_existing_code(
|
|
|
113
155
|
optimizer: str,
|
|
114
156
|
input_shape_parm: None | int = None,
|
|
115
157
|
num_classes: None | int = None,
|
|
158
|
+
num_layers: int = 1,
|
|
116
159
|
) -> AutoClassifier:
|
|
117
160
|
"""
|
|
118
161
|
Calls an existing AutoClassifier instance.
|
|
@@ -142,6 +185,7 @@ def call_existing_code(
|
|
|
142
185
|
num_classes=num_classes,
|
|
143
186
|
units=units,
|
|
144
187
|
activation=activation,
|
|
188
|
+
num_layers=num_layers,
|
|
145
189
|
)
|
|
146
190
|
model.compile(
|
|
147
191
|
optimizer=optimizer,
|
|
@@ -151,7 +195,9 @@ def call_existing_code(
|
|
|
151
195
|
return model
|
|
152
196
|
|
|
153
197
|
|
|
154
|
-
def build_model(
|
|
198
|
+
def build_model(
|
|
199
|
+
hp, input_shape_parm: None | int, num_classes: None | int, **kwargs
|
|
200
|
+
) -> AutoClassifier:
|
|
155
201
|
"""Builds a neural network model using Keras Tuner's search algorithm.
|
|
156
202
|
|
|
157
203
|
Parameters
|
|
@@ -163,17 +209,51 @@ def build_model(hp, input_shape_parm: None | int, num_classes: None | int) -> Au
|
|
|
163
209
|
num_classes : `int`
|
|
164
210
|
The number of classes in the dataset.
|
|
165
211
|
|
|
212
|
+
Keyword Arguments:
|
|
213
|
+
----------
|
|
214
|
+
Additional keyword arguments to pass to the model.
|
|
215
|
+
|
|
216
|
+
hyperparameters : `dict`
|
|
217
|
+
The hyperparameters to set.
|
|
218
|
+
|
|
166
219
|
Returns
|
|
167
220
|
-------
|
|
168
221
|
`keras.Model`
|
|
169
222
|
The neural network model.
|
|
170
223
|
"""
|
|
171
|
-
|
|
172
|
-
|
|
224
|
+
hyperparameters = kwargs.get("hyperparameters", None)
|
|
225
|
+
hyperparameters_keys = hyperparameters.keys() if hyperparameters is not None else []
|
|
226
|
+
|
|
227
|
+
units = (
|
|
228
|
+
hp.Int(
|
|
229
|
+
"units",
|
|
230
|
+
min_value=int(input_shape_parm * 0.2),
|
|
231
|
+
max_value=int(input_shape_parm * 1.5),
|
|
232
|
+
step=2,
|
|
233
|
+
)
|
|
234
|
+
if "units" not in hyperparameters_keys
|
|
235
|
+
else hyperparameters["units"]
|
|
236
|
+
)
|
|
237
|
+
activation = (
|
|
238
|
+
hp.Choice("activation", ["sigmoid", "relu", "tanh", "selu", "softplus", "softsign"])
|
|
239
|
+
if "activation" not in hyperparameters_keys
|
|
240
|
+
else hyperparameters["activation"]
|
|
241
|
+
)
|
|
242
|
+
optimizer = (
|
|
243
|
+
hp.Choice("optimizer", ["sgd", "adam", "adadelta", "rmsprop", "adamax", "adagrad"])
|
|
244
|
+
if "optimizer" not in hyperparameters_keys
|
|
245
|
+
else hyperparameters["optimizer"]
|
|
246
|
+
)
|
|
247
|
+
threshold = (
|
|
248
|
+
hp.Float("threshold", min_value=0.1, max_value=0.9, sampling="log")
|
|
249
|
+
if "threshold" not in hyperparameters_keys
|
|
250
|
+
else hyperparameters["threshold"]
|
|
251
|
+
)
|
|
252
|
+
num_layers = (
|
|
253
|
+
hp.Int("num_layers", min_value=1, max_value=10, step=1)
|
|
254
|
+
if "num_layers" not in hyperparameters_keys
|
|
255
|
+
else hyperparameters["num_layers"]
|
|
173
256
|
)
|
|
174
|
-
activation = hp.Choice("activation", ["sigmoid", "relu", "tanh", "selu", "softplus"])
|
|
175
|
-
optimizer = hp.Choice("optimizer", ["sgd", "adam", "adadelta"])
|
|
176
|
-
threshold = hp.Float("threshold", min_value=0.1, max_value=0.9, sampling="log")
|
|
177
257
|
|
|
178
258
|
model = call_existing_code(
|
|
179
259
|
units=units,
|
|
@@ -182,10 +262,12 @@ def build_model(hp, input_shape_parm: None | int, num_classes: None | int) -> Au
|
|
|
182
262
|
optimizer=optimizer,
|
|
183
263
|
input_shape_parm=input_shape_parm,
|
|
184
264
|
num_classes=num_classes,
|
|
265
|
+
num_layers=num_layers,
|
|
185
266
|
)
|
|
186
267
|
return model
|
|
187
268
|
|
|
188
269
|
|
|
270
|
+
@suppress_warnings
|
|
189
271
|
def setup_model(
|
|
190
272
|
data: DataFrame,
|
|
191
273
|
target: str,
|
|
@@ -194,6 +276,7 @@ def setup_model(
|
|
|
194
276
|
seed=None,
|
|
195
277
|
train_mode: bool = True,
|
|
196
278
|
filepath: str = "./my_dir/best_model",
|
|
279
|
+
method: str = "Hyperband",
|
|
197
280
|
**kwargs,
|
|
198
281
|
) -> AutoClassifier:
|
|
199
282
|
"""Setup model for training and tuning.
|
|
@@ -214,6 +297,8 @@ def setup_model(
|
|
|
214
297
|
Whether to train the model or not.
|
|
215
298
|
filepath : `str`
|
|
216
299
|
The path to save the best model to.
|
|
300
|
+
method : `str`
|
|
301
|
+
The method to use for hyperparameter tuning. Options are "Hyperband" and "RandomSearch".
|
|
217
302
|
|
|
218
303
|
Keyword Arguments:
|
|
219
304
|
----------
|
|
@@ -229,30 +314,30 @@ def setup_model(
|
|
|
229
314
|
The objective to optimize.
|
|
230
315
|
verbose : `bool`
|
|
231
316
|
Whether to print verbose output.
|
|
317
|
+
hyperparameters : `dict`
|
|
318
|
+
The hyperparameters to set.
|
|
232
319
|
|
|
233
320
|
Returns
|
|
234
321
|
-------
|
|
235
322
|
model : `AutoClassifier`
|
|
236
323
|
The trained model.
|
|
237
324
|
"""
|
|
238
|
-
max_trials = kwargs
|
|
239
|
-
directory = kwargs
|
|
240
|
-
project_name = kwargs
|
|
241
|
-
objective = kwargs
|
|
242
|
-
verbose = kwargs
|
|
325
|
+
max_trials = kwargs.get("max_trials", 10)
|
|
326
|
+
directory = kwargs.get("directory", "./my_dir")
|
|
327
|
+
project_name = kwargs.get("project_name", "get_best")
|
|
328
|
+
objective = kwargs.get("objective", "val_loss")
|
|
329
|
+
verbose = kwargs.get("verbose", True)
|
|
330
|
+
hyperparameters = kwargs.get("hyperparameters", None)
|
|
243
331
|
|
|
244
332
|
X = data.drop(columns=target)
|
|
245
333
|
input_sample = X.sample(1)
|
|
246
334
|
y = data[target]
|
|
247
|
-
# Verify if there are categorical columns in the dataframe
|
|
248
335
|
assert (
|
|
249
336
|
X.select_dtypes(include=["object"]).empty == True
|
|
250
337
|
), "Categorical variables within the DataFrame must be encoded, this is done by using the DataFrameEncoder from likelihood."
|
|
251
338
|
validation_split = 1.0 - train_size
|
|
252
|
-
# Create my_dir path if it does not exist
|
|
253
339
|
|
|
254
340
|
if train_mode:
|
|
255
|
-
# Create a new directory if it does not exist
|
|
256
341
|
try:
|
|
257
342
|
if (not os.path.exists(directory)) and directory != "./":
|
|
258
343
|
os.makedirs(directory)
|
|
@@ -263,7 +348,6 @@ def setup_model(
|
|
|
263
348
|
except:
|
|
264
349
|
print("Warning: unable to create directory")
|
|
265
350
|
|
|
266
|
-
# Create a Classifier instance
|
|
267
351
|
y_encoder = OneHotEncoder()
|
|
268
352
|
y = y_encoder.encode(y.to_list())
|
|
269
353
|
X = X.to_numpy()
|
|
@@ -276,34 +360,239 @@ def setup_model(
|
|
|
276
360
|
num_classes = y.shape[1]
|
|
277
361
|
global build_model
|
|
278
362
|
build_model = partial(
|
|
279
|
-
build_model,
|
|
363
|
+
build_model,
|
|
364
|
+
input_shape_parm=input_shape_parm,
|
|
365
|
+
num_classes=num_classes,
|
|
366
|
+
hyperparameters=hyperparameters,
|
|
280
367
|
)
|
|
281
368
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
369
|
+
if method == "Hyperband":
|
|
370
|
+
tuner = keras_tuner.Hyperband(
|
|
371
|
+
hypermodel=build_model,
|
|
372
|
+
objective=objective,
|
|
373
|
+
max_epochs=epochs,
|
|
374
|
+
factor=3,
|
|
375
|
+
directory=directory,
|
|
376
|
+
project_name=project_name,
|
|
377
|
+
seed=seed,
|
|
378
|
+
)
|
|
379
|
+
elif method == "RandomSearch":
|
|
380
|
+
tuner = keras_tuner.RandomSearch(
|
|
381
|
+
hypermodel=build_model,
|
|
382
|
+
objective=objective,
|
|
383
|
+
max_trials=max_trials,
|
|
384
|
+
directory=directory,
|
|
385
|
+
project_name=project_name,
|
|
386
|
+
seed=seed,
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
tuner.search(X, y, epochs=epochs, validation_split=validation_split, verbose=verbose)
|
|
293
390
|
models = tuner.get_best_models(num_models=2)
|
|
294
391
|
best_model = models[0]
|
|
295
392
|
best_model(input_sample)
|
|
296
393
|
|
|
297
|
-
# save model
|
|
298
394
|
best_model.save(filepath, save_format="tf")
|
|
299
395
|
|
|
300
396
|
if verbose:
|
|
301
397
|
tuner.results_summary()
|
|
302
398
|
else:
|
|
303
|
-
# Load the best model from the directory
|
|
304
399
|
best_model = tf.keras.models.load_model(filepath)
|
|
305
400
|
|
|
306
|
-
|
|
401
|
+
best_hps = tuner.get_best_hyperparameters(1)[0].values
|
|
402
|
+
return best_model, pd.DataFrame(best_hps, index=["Value"])
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
class GetInsights:
|
|
406
|
+
def __init__(self, model: AutoClassifier, inputs: np.ndarray) -> None:
|
|
407
|
+
self.inputs = inputs
|
|
408
|
+
self.model = model
|
|
409
|
+
self.encoder_layer = self.model.encoder.layers[0]
|
|
410
|
+
self.decoder_layer = self.model.decoder.layers[0]
|
|
411
|
+
self.classifier_layer = self.model.classifier.layers[-2]
|
|
412
|
+
self.encoder_weights = self.encoder_layer.get_weights()[0]
|
|
413
|
+
self.decoder_weights = self.decoder_layer.get_weights()[0]
|
|
414
|
+
self.classifier_weights = self.classifier_layer.get_weights()[0]
|
|
415
|
+
colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)
|
|
416
|
+
|
|
417
|
+
by_hsv = sorted(
|
|
418
|
+
(tuple(mcolors.rgb_to_hsv(mcolors.to_rgba(color)[:3])), name)
|
|
419
|
+
for name, color in colors.items()
|
|
420
|
+
)
|
|
421
|
+
self.sorted_names = [name for hsv, name in by_hsv if hsv[1] > 0.4 and hsv[2] >= 0.4]
|
|
422
|
+
random.shuffle(self.sorted_names)
|
|
423
|
+
|
|
424
|
+
def predictor_analyzer(
|
|
425
|
+
self,
|
|
426
|
+
frac=None,
|
|
427
|
+
cmap: str = "viridis",
|
|
428
|
+
aspect: str = "auto",
|
|
429
|
+
highlight: bool = True,
|
|
430
|
+
**kwargs,
|
|
431
|
+
) -> None:
|
|
432
|
+
self._viz_weights(cmap=cmap, aspect=aspect, highlight=highlight, **kwargs)
|
|
433
|
+
inputs = self.inputs.copy()
|
|
434
|
+
y_labels = kwargs.get("y_labels", None)
|
|
435
|
+
if frac:
|
|
436
|
+
n = int(frac * self.inputs.shape[0])
|
|
437
|
+
indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
|
|
438
|
+
inputs = inputs[indexes]
|
|
439
|
+
inputs[np.isnan(inputs)] = 0.0
|
|
440
|
+
encoded = self.model.encoder(inputs)
|
|
441
|
+
reconstructed = self.model.decoder(encoded)
|
|
442
|
+
combined = tf.concat([reconstructed, encoded], axis=1)
|
|
443
|
+
self.classification = self.model.classifier(combined).numpy().argmax(axis=1)
|
|
444
|
+
ax = plt.subplot(1, 2, 1)
|
|
445
|
+
plt.imshow(self.inputs, cmap=cmap, aspect=aspect)
|
|
446
|
+
plt.colorbar()
|
|
447
|
+
plt.title("Original Data")
|
|
448
|
+
plt.subplot(1, 2, 2, sharex=ax, sharey=ax)
|
|
449
|
+
plt.imshow(reconstructed, cmap=cmap, aspect=aspect)
|
|
450
|
+
plt.colorbar()
|
|
451
|
+
plt.title("Decoder Layer Reconstruction")
|
|
452
|
+
plt.show()
|
|
453
|
+
|
|
454
|
+
self._get_tsne_repr(inputs=inputs, frac=frac)
|
|
455
|
+
self._viz_tsne_repr(c=self.classification)
|
|
456
|
+
|
|
457
|
+
self.data = pd.DataFrame(encoded, columns=[f"Feature {i}" for i in range(encoded.shape[1])])
|
|
458
|
+
self.data_input = pd.DataFrame(
|
|
459
|
+
inputs,
|
|
460
|
+
columns=(
|
|
461
|
+
[f"Feature {i}" for i in range(inputs.shape[1])] if y_labels is None else y_labels
|
|
462
|
+
),
|
|
463
|
+
)
|
|
464
|
+
self.data["class"] = self.classification
|
|
465
|
+
self.data_input["class"] = self.classification
|
|
466
|
+
radviz(self.data, "class", color=self.colors)
|
|
467
|
+
plt.title("Radviz Visualization of Latent Space")
|
|
468
|
+
plt.show()
|
|
469
|
+
|
|
470
|
+
radviz(self.data_input, "class", color=self.colors)
|
|
471
|
+
plt.title("Radviz Visualization of Input Data")
|
|
472
|
+
plt.show()
|
|
473
|
+
return self._statistics(self.data_input)
|
|
474
|
+
|
|
475
|
+
def _statistics(self, data_input: DataFrame, **kwargs) -> DataFrame:
|
|
476
|
+
data = data_input.copy(deep=True)
|
|
477
|
+
|
|
478
|
+
if not pd.api.types.is_string_dtype(data["class"]):
|
|
479
|
+
data["class"] = data["class"].astype(str)
|
|
480
|
+
|
|
481
|
+
data.ffill(inplace=True)
|
|
482
|
+
grouped_data = data.groupby("class")
|
|
483
|
+
|
|
484
|
+
numerical_stats = grouped_data.agg(["mean", "min", "max", "std", "median"])
|
|
485
|
+
numerical_stats.columns = ["_".join(col).strip() for col in numerical_stats.columns.values]
|
|
486
|
+
|
|
487
|
+
def get_mode(x):
|
|
488
|
+
mode_series = x.mode()
|
|
489
|
+
return mode_series.iloc[0] if not mode_series.empty else None
|
|
490
|
+
|
|
491
|
+
mode_stats = grouped_data.apply(get_mode, include_groups=False)
|
|
492
|
+
mode_stats.columns = [f"{col}_mode" for col in mode_stats.columns]
|
|
493
|
+
combined_stats = pd.concat([numerical_stats, mode_stats], axis=1)
|
|
494
|
+
|
|
495
|
+
return combined_stats.T
|
|
496
|
+
|
|
497
|
+
def _viz_weights(
|
|
498
|
+
self, cmap: str = "viridis", aspect: str = "auto", highlight: bool = True, **kwargs
|
|
499
|
+
) -> None:
|
|
500
|
+
title = kwargs.get("title", "Encoder Layer Weights (Dense Layer)")
|
|
501
|
+
y_labels = kwargs.get("y_labels", None)
|
|
502
|
+
cmap_highlight = kwargs.get("cmap_highlight", "Pastel1")
|
|
503
|
+
highlight_mask = np.zeros_like(self.encoder_weights, dtype=bool)
|
|
504
|
+
|
|
505
|
+
plt.imshow(self.encoder_weights, cmap=cmap, aspect=aspect)
|
|
506
|
+
plt.colorbar()
|
|
507
|
+
plt.title(title)
|
|
508
|
+
if y_labels is not None:
|
|
509
|
+
plt.yticks(ticks=np.arange(self.encoder_weights.shape[0]), labels=y_labels)
|
|
510
|
+
if highlight:
|
|
511
|
+
for i, j in enumerate(self.encoder_weights.argmax(axis=1)):
|
|
512
|
+
highlight_mask[i, j] = True
|
|
513
|
+
plt.imshow(
|
|
514
|
+
np.ma.masked_where(~highlight_mask, self.encoder_weights),
|
|
515
|
+
cmap=cmap_highlight,
|
|
516
|
+
alpha=0.5,
|
|
517
|
+
aspect=aspect,
|
|
518
|
+
)
|
|
519
|
+
plt.show()
|
|
520
|
+
|
|
521
|
+
def _get_tsne_repr(self, inputs=None, frac=None) -> None:
|
|
522
|
+
if inputs is None:
|
|
523
|
+
inputs = self.inputs.copy()
|
|
524
|
+
if frac:
|
|
525
|
+
n = int(frac * self.inputs.shape[0])
|
|
526
|
+
indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
|
|
527
|
+
inputs = inputs[indexes]
|
|
528
|
+
inputs[np.isnan(inputs)] = 0.0
|
|
529
|
+
self.latent_representations = inputs @ self.encoder_weights
|
|
530
|
+
|
|
531
|
+
tsne = TSNE(n_components=2)
|
|
532
|
+
self.reduced_data_tsne = tsne.fit_transform(self.latent_representations)
|
|
533
|
+
|
|
534
|
+
def _viz_tsne_repr(self, **kwargs) -> None:
|
|
535
|
+
c = kwargs.get("c", None)
|
|
536
|
+
self.colors = (
|
|
537
|
+
kwargs.get("colors", self.sorted_names[: len(np.unique(c))]) if c is not None else None
|
|
538
|
+
)
|
|
539
|
+
plt.scatter(
|
|
540
|
+
self.reduced_data_tsne[:, 0],
|
|
541
|
+
self.reduced_data_tsne[:, 1],
|
|
542
|
+
cmap=matplotlib.colors.ListedColormap(self.colors) if c is not None else None,
|
|
543
|
+
c=c,
|
|
544
|
+
)
|
|
545
|
+
if c is not None:
|
|
546
|
+
cb = plt.colorbar()
|
|
547
|
+
loc = np.arange(0, max(c), max(c) / float(len(self.colors)))
|
|
548
|
+
cb.set_ticks(loc)
|
|
549
|
+
cb.set_ticklabels(np.unique(c))
|
|
550
|
+
plt.title("t-SNE Visualization of Latent Space")
|
|
551
|
+
plt.xlabel("t-SNE 1")
|
|
552
|
+
plt.ylabel("t-SNE 2")
|
|
553
|
+
plt.show()
|
|
307
554
|
|
|
308
555
|
|
|
309
556
|
########################################################################################
|
|
557
|
+
|
|
558
|
+
if __name__ == "__main__":
|
|
559
|
+
# Example usage
|
|
560
|
+
import pandas as pd
|
|
561
|
+
from sklearn.datasets import load_iris
|
|
562
|
+
from sklearn.preprocessing import OneHotEncoder
|
|
563
|
+
|
|
564
|
+
# Load the dataset
|
|
565
|
+
iris = load_iris()
|
|
566
|
+
|
|
567
|
+
# Convert to a DataFrame for easy exploration
|
|
568
|
+
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
|
|
569
|
+
iris_df["species"] = iris.target
|
|
570
|
+
|
|
571
|
+
X = iris_df.drop(columns="species")
|
|
572
|
+
y_labels = X.columns
|
|
573
|
+
X = X.values
|
|
574
|
+
y = iris_df["species"].values
|
|
575
|
+
|
|
576
|
+
X = np.asarray(X).astype(np.float32)
|
|
577
|
+
|
|
578
|
+
encoder = OneHotEncoder()
|
|
579
|
+
y = encoder.fit_transform(y.reshape(-1, 1)).toarray()
|
|
580
|
+
y = np.asarray(y).astype(np.float32)
|
|
581
|
+
|
|
582
|
+
model = AutoClassifier(
|
|
583
|
+
input_shape_parm=X.shape[1], num_classes=3, units=27, activation="selu", num_layers=2
|
|
584
|
+
)
|
|
585
|
+
model.compile(
|
|
586
|
+
optimizer="adam",
|
|
587
|
+
loss=tf.keras.losses.CategoricalCrossentropy(),
|
|
588
|
+
metrics=[tf.keras.metrics.F1Score(threshold=0.5)],
|
|
589
|
+
)
|
|
590
|
+
model.fit(X, y, epochs=50, validation_split=0.2)
|
|
591
|
+
|
|
592
|
+
insights = GetInsights(model, X)
|
|
593
|
+
summary = insights.predictor_analyzer(frac=1.0, y_labels=y_labels)
|
|
594
|
+
insights._get_tsne_repr()
|
|
595
|
+
insights._viz_tsne_repr()
|
|
596
|
+
insights._viz_tsne_repr(c=iris_df["species"])
|
|
597
|
+
insights._viz_weights()
|
|
598
|
+
print(summary)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: likelihood
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.25
|
|
4
4
|
Summary: A package that performs the maximum likelihood algorithm.
|
|
5
5
|
Home-page: https://github.com/jzsmoreno/likelihood/
|
|
6
6
|
Author: J. A. Moreno-Guerra
|
|
@@ -13,7 +13,7 @@ Classifier: Operating System :: OS Independent
|
|
|
13
13
|
Requires-Python: >=3.10
|
|
14
14
|
Description-Content-Type: text/markdown
|
|
15
15
|
License-File: LICENSE
|
|
16
|
-
Requires-Dist: black[jupyter]
|
|
16
|
+
Requires-Dist: black[jupyter]>=24.3.0
|
|
17
17
|
Requires-Dist: mypy-extensions==1.0.0
|
|
18
18
|
Requires-Dist: types-openpyxl==3.1.0.15
|
|
19
19
|
Requires-Dist: pydocstyle==6.3.0
|
|
@@ -31,6 +31,18 @@ Requires-Dist: pyvis; extra == "full"
|
|
|
31
31
|
Requires-Dist: tensorflow==2.15.0; extra == "full"
|
|
32
32
|
Requires-Dist: keras-tuner; extra == "full"
|
|
33
33
|
Requires-Dist: scikit-learn; extra == "full"
|
|
34
|
+
Dynamic: author
|
|
35
|
+
Dynamic: author-email
|
|
36
|
+
Dynamic: classifier
|
|
37
|
+
Dynamic: description
|
|
38
|
+
Dynamic: description-content-type
|
|
39
|
+
Dynamic: home-page
|
|
40
|
+
Dynamic: maintainer
|
|
41
|
+
Dynamic: maintainer-email
|
|
42
|
+
Dynamic: provides-extra
|
|
43
|
+
Dynamic: requires-dist
|
|
44
|
+
Dynamic: requires-python
|
|
45
|
+
Dynamic: summary
|
|
34
46
|
|
|
35
47
|

|
|
36
48
|
|
|
@@ -2,19 +2,19 @@ likelihood/__init__.py,sha256=5C0hapdsk85XZhN_rssRAEFpkRRuKNtj6cyRbqD2_gM,994
|
|
|
2
2
|
likelihood/main.py,sha256=fcCkGOOWKjfvw2tLVqjuKPV8t0rVCIT9FlbYcOv4EYo,7974
|
|
3
3
|
likelihood/graph/__init__.py,sha256=6TuFDfmXTwpLyHl7_KqBfdzW6zqHjGzIFvymjFPlvjI,21
|
|
4
4
|
likelihood/graph/graph.py,sha256=hGWCznxaRQ8BfY2aLjrvwriZkAIsz5ydKXF4x_7b0EQ,3359
|
|
5
|
-
likelihood/graph/nn.py,sha256=
|
|
5
|
+
likelihood/graph/nn.py,sha256=WuK66hRTN5hdVIArgfSweqtE098tb6QFd2ZMFaHvnZA,12263
|
|
6
6
|
likelihood/models/__init__.py,sha256=e6nB4w47w0Q9DrAFeP3OcUgcoHOtf7Il4mBhgf4AARg,52
|
|
7
7
|
likelihood/models/hmm.py,sha256=0s0gFySH1u4NjRaZDxiZ8oeTaFhFrw1x0GJxwy3dFrA,6253
|
|
8
8
|
likelihood/models/regression.py,sha256=9cakyGlJCEO6WfpoKLh3GxdXQeQp7cUvJIkQ5odT0TA,9404
|
|
9
9
|
likelihood/models/simulation.py,sha256=L_9Mihcca7i_AnvWWrZilFV8VEhz_Z8fDLepmwBGSi8,8832
|
|
10
10
|
likelihood/models/utils.py,sha256=VtEj07lV-GRoWraQgpfjU0jTt1Ntf9MXgYwe6XYQh20,1552
|
|
11
11
|
likelihood/models/deep/__init__.py,sha256=-KIPippVaMqgG8mEgYjNxYQdqOUcFhUuKhbVe8TTCfo,28
|
|
12
|
-
likelihood/models/deep/autoencoders.py,sha256=
|
|
12
|
+
likelihood/models/deep/autoencoders.py,sha256=seE1rb1t1gbbKRyEzfi01BqMsV4MU6yakVTLcukAMkg,20591
|
|
13
13
|
likelihood/tools/__init__.py,sha256=MCjsCWfBNKE2uMN0VizDN1uFzZ_md0X2WZeBdWhrCR8,50
|
|
14
14
|
likelihood/tools/numeric_tools.py,sha256=FA44kbiAcxcquz1el_g3Pqsp5ii8XFkAIrsMs5bGkj0,11445
|
|
15
15
|
likelihood/tools/tools.py,sha256=iZBC7IHTFpAyxooyel7ZFi-5-G0nCotNLLtxenPw9T8,44303
|
|
16
|
-
likelihood-1.2.
|
|
17
|
-
likelihood-1.2.
|
|
18
|
-
likelihood-1.2.
|
|
19
|
-
likelihood-1.2.
|
|
20
|
-
likelihood-1.2.
|
|
16
|
+
likelihood-1.2.25.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
|
|
17
|
+
likelihood-1.2.25.dist-info/METADATA,sha256=hUsmkghXP8m4z3FtWcM64gwBEW74HIOTNJifK26OOkw,2771
|
|
18
|
+
likelihood-1.2.25.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
19
|
+
likelihood-1.2.25.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
|
|
20
|
+
likelihood-1.2.25.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|