likelihood 1.2.24__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- likelihood/graph/graph.py +3 -3
- likelihood/graph/nn.py +6 -8
- likelihood/models/deep/autoencoders.py +344 -36
- likelihood/models/simulation.py +0 -6
- likelihood/models/utils.py +3 -11
- {likelihood-1.2.24.dist-info → likelihood-1.3.0.dist-info}/METADATA +15 -3
- {likelihood-1.2.24.dist-info → likelihood-1.3.0.dist-info}/RECORD +10 -10
- {likelihood-1.2.24.dist-info → likelihood-1.3.0.dist-info}/WHEEL +1 -1
- {likelihood-1.2.24.dist-info → likelihood-1.3.0.dist-info}/LICENSE +0 -0
- {likelihood-1.2.24.dist-info → likelihood-1.3.0.dist-info}/top_level.txt +0 -0
likelihood/graph/graph.py
CHANGED
|
@@ -45,8 +45,8 @@ class DynamicGraph(FeatureSelection):
|
|
|
45
45
|
|
|
46
46
|
def draw(self, name="graph.html", **kwargs) -> None:
|
|
47
47
|
"""Display the network using HTML format"""
|
|
48
|
-
spring_length = kwargs
|
|
49
|
-
node_distance = kwargs
|
|
48
|
+
spring_length = kwargs.get("spring_length", 500)
|
|
49
|
+
node_distance = kwargs.get("node_distance", 100)
|
|
50
50
|
self.G.repulsion(node_distance=node_distance, spring_length=spring_length)
|
|
51
51
|
self.G.show_buttons(filter_=["physics"])
|
|
52
52
|
self.G.show(name)
|
|
@@ -89,5 +89,5 @@ if __name__ == "__main__":
|
|
|
89
89
|
df["y"] = y
|
|
90
90
|
# Instantiate DynamicGraph
|
|
91
91
|
fs = DynamicGraph(df, n_importances=2)
|
|
92
|
-
|
|
92
|
+
fs.fit()
|
|
93
93
|
fs.draw()
|
likelihood/graph/nn.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import os
|
|
2
3
|
|
|
3
|
-
os.environ["
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
import logging
|
|
4
|
+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
5
|
+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
|
|
6
|
+
|
|
7
7
|
import warnings
|
|
8
8
|
from typing import List, Tuple
|
|
9
9
|
|
|
@@ -17,9 +17,7 @@ from sklearn.model_selection import train_test_split
|
|
|
17
17
|
|
|
18
18
|
from likelihood.tools import generate_feature_yaml
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
|
|
20
|
+
tf.get_logger().setLevel("ERROR")
|
|
23
21
|
|
|
24
22
|
|
|
25
23
|
def compare_similarity(arr1: np.ndarray, arr2: np.ndarray) -> int:
|
|
@@ -98,7 +96,7 @@ def cal_adjacency_matrix(
|
|
|
98
96
|
|
|
99
97
|
assert len(df_categorical) > 0
|
|
100
98
|
|
|
101
|
-
similarity = kwargs
|
|
99
|
+
similarity = kwargs.get("similarity", len(df_categorical.columns) - 1)
|
|
102
100
|
assert similarity <= df_categorical.shape[1]
|
|
103
101
|
|
|
104
102
|
adj_dict = {}
|
|
@@ -1,19 +1,40 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
|
+
import random
|
|
3
4
|
from functools import partial
|
|
4
5
|
from shutil import rmtree
|
|
5
6
|
|
|
6
|
-
import
|
|
7
|
+
import matplotlib
|
|
8
|
+
import matplotlib.colors as mcolors
|
|
9
|
+
import matplotlib.pyplot as plt
|
|
7
10
|
import numpy as np
|
|
8
11
|
import pandas as pd
|
|
12
|
+
from pandas.plotting import radviz
|
|
13
|
+
|
|
14
|
+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
15
|
+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
|
|
16
|
+
|
|
17
|
+
import warnings
|
|
18
|
+
from functools import wraps
|
|
19
|
+
|
|
20
|
+
import keras_tuner
|
|
9
21
|
import tensorflow as tf
|
|
10
22
|
from pandas.core.frame import DataFrame
|
|
23
|
+
from sklearn.manifold import TSNE
|
|
11
24
|
|
|
12
25
|
from likelihood.tools import OneHotEncoder
|
|
13
26
|
|
|
14
|
-
|
|
27
|
+
tf.get_logger().setLevel("ERROR")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def suppress_warnings(func):
|
|
31
|
+
@wraps(func)
|
|
32
|
+
def wrapper(*args, **kwargs):
|
|
33
|
+
with warnings.catch_warnings():
|
|
34
|
+
warnings.simplefilter("ignore")
|
|
35
|
+
return func(*args, **kwargs)
|
|
15
36
|
|
|
16
|
-
|
|
37
|
+
return wrapper
|
|
17
38
|
|
|
18
39
|
|
|
19
40
|
@tf.keras.utils.register_keras_serializable(package="Custom", name="AutoClassifier")
|
|
@@ -35,7 +56,7 @@ class AutoClassifier(tf.keras.Model):
|
|
|
35
56
|
from_config(cls, config): Recreates an instance of AutoClassifier from its configuration.
|
|
36
57
|
"""
|
|
37
58
|
|
|
38
|
-
def __init__(self, input_shape_parm, num_classes, units, activation):
|
|
59
|
+
def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
|
|
39
60
|
"""
|
|
40
61
|
Initializes an AutoClassifier instance with the given parameters.
|
|
41
62
|
|
|
@@ -49,6 +70,17 @@ class AutoClassifier(tf.keras.Model):
|
|
|
49
70
|
The number of neurons in each hidden layer.
|
|
50
71
|
activation : `str`
|
|
51
72
|
The type of activation function to use for the neural network layers.
|
|
73
|
+
|
|
74
|
+
Keyword Arguments:
|
|
75
|
+
----------
|
|
76
|
+
Additional keyword arguments to pass to the model.
|
|
77
|
+
|
|
78
|
+
classifier_activation : `str`
|
|
79
|
+
The activation function to use for the classifier layer. Default is "softmax". If the activation function is not a classification function, the model can be used in regression problems.
|
|
80
|
+
num_layers : `int`
|
|
81
|
+
The number of hidden layers in the classifier. Default is 1.
|
|
82
|
+
dropout : `float`
|
|
83
|
+
The dropout rate to use in the classifier. Default is None.
|
|
52
84
|
"""
|
|
53
85
|
super(AutoClassifier, self).__init__()
|
|
54
86
|
self.input_shape_parm = input_shape_parm
|
|
@@ -59,6 +91,9 @@ class AutoClassifier(tf.keras.Model):
|
|
|
59
91
|
self.encoder = None
|
|
60
92
|
self.decoder = None
|
|
61
93
|
self.classifier = None
|
|
94
|
+
self.classifier_activation = kwargs.get("classifier_activation", "softmax")
|
|
95
|
+
self.num_layers = kwargs.get("num_layers", 1)
|
|
96
|
+
self.dropout = kwargs.get("dropout", None)
|
|
62
97
|
|
|
63
98
|
def build(self, input_shape):
|
|
64
99
|
self.encoder = tf.keras.Sequential(
|
|
@@ -75,8 +110,16 @@ class AutoClassifier(tf.keras.Model):
|
|
|
75
110
|
]
|
|
76
111
|
)
|
|
77
112
|
|
|
78
|
-
self.classifier = tf.keras.Sequential(
|
|
79
|
-
|
|
113
|
+
self.classifier = tf.keras.Sequential()
|
|
114
|
+
if self.num_layers > 1:
|
|
115
|
+
for _ in range(self.num_layers - 1):
|
|
116
|
+
self.classifier.add(
|
|
117
|
+
tf.keras.layers.Dense(units=self.units, activation=self.activation)
|
|
118
|
+
)
|
|
119
|
+
if self.dropout:
|
|
120
|
+
self.classifier.add(tf.keras.layers.Dropout(self.dropout))
|
|
121
|
+
self.classifier.add(
|
|
122
|
+
tf.keras.layers.Dense(units=self.num_classes, activation=self.classifier_activation)
|
|
80
123
|
)
|
|
81
124
|
|
|
82
125
|
def call(self, x):
|
|
@@ -92,6 +135,9 @@ class AutoClassifier(tf.keras.Model):
|
|
|
92
135
|
"num_classes": self.num_classes,
|
|
93
136
|
"units": self.units,
|
|
94
137
|
"activation": self.activation,
|
|
138
|
+
"classifier_activation": self.classifier_activation,
|
|
139
|
+
"num_layers": self.num_layers,
|
|
140
|
+
"dropout": self.dropout,
|
|
95
141
|
}
|
|
96
142
|
base_config = super(AutoClassifier, self).get_config()
|
|
97
143
|
return dict(list(base_config.items()) + list(config.items()))
|
|
@@ -103,6 +149,9 @@ class AutoClassifier(tf.keras.Model):
|
|
|
103
149
|
num_classes=config["num_classes"],
|
|
104
150
|
units=config["units"],
|
|
105
151
|
activation=config["activation"],
|
|
152
|
+
classifier_activation=config["classifier_activation"],
|
|
153
|
+
num_layers=config["num_layers"],
|
|
154
|
+
dropout=config["dropout"],
|
|
106
155
|
)
|
|
107
156
|
|
|
108
157
|
|
|
@@ -113,6 +162,8 @@ def call_existing_code(
|
|
|
113
162
|
optimizer: str,
|
|
114
163
|
input_shape_parm: None | int = None,
|
|
115
164
|
num_classes: None | int = None,
|
|
165
|
+
num_layers: int = 1,
|
|
166
|
+
**kwargs,
|
|
116
167
|
) -> AutoClassifier:
|
|
117
168
|
"""
|
|
118
169
|
Calls an existing AutoClassifier instance.
|
|
@@ -137,11 +188,14 @@ def call_existing_code(
|
|
|
137
188
|
`AutoClassifier`
|
|
138
189
|
The AutoClassifier instance.
|
|
139
190
|
"""
|
|
191
|
+
dropout = kwargs.get("dropout", None)
|
|
140
192
|
model = AutoClassifier(
|
|
141
193
|
input_shape_parm=input_shape_parm,
|
|
142
194
|
num_classes=num_classes,
|
|
143
195
|
units=units,
|
|
144
196
|
activation=activation,
|
|
197
|
+
num_layers=num_layers,
|
|
198
|
+
dropout=dropout,
|
|
145
199
|
)
|
|
146
200
|
model.compile(
|
|
147
201
|
optimizer=optimizer,
|
|
@@ -151,7 +205,9 @@ def call_existing_code(
|
|
|
151
205
|
return model
|
|
152
206
|
|
|
153
207
|
|
|
154
|
-
def build_model(
|
|
208
|
+
def build_model(
|
|
209
|
+
hp, input_shape_parm: None | int, num_classes: None | int, **kwargs
|
|
210
|
+
) -> AutoClassifier:
|
|
155
211
|
"""Builds a neural network model using Keras Tuner's search algorithm.
|
|
156
212
|
|
|
157
213
|
Parameters
|
|
@@ -163,17 +219,56 @@ def build_model(hp, input_shape_parm: None | int, num_classes: None | int) -> Au
|
|
|
163
219
|
num_classes : `int`
|
|
164
220
|
The number of classes in the dataset.
|
|
165
221
|
|
|
222
|
+
Keyword Arguments:
|
|
223
|
+
----------
|
|
224
|
+
Additional keyword arguments to pass to the model.
|
|
225
|
+
|
|
226
|
+
hyperparameters : `dict`
|
|
227
|
+
The hyperparameters to set.
|
|
228
|
+
|
|
166
229
|
Returns
|
|
167
230
|
-------
|
|
168
231
|
`keras.Model`
|
|
169
232
|
The neural network model.
|
|
170
233
|
"""
|
|
171
|
-
|
|
172
|
-
|
|
234
|
+
hyperparameters = kwargs.get("hyperparameters", None)
|
|
235
|
+
hyperparameters_keys = hyperparameters.keys() if hyperparameters is not None else []
|
|
236
|
+
|
|
237
|
+
units = (
|
|
238
|
+
hp.Int(
|
|
239
|
+
"units",
|
|
240
|
+
min_value=int(input_shape_parm * 0.2),
|
|
241
|
+
max_value=int(input_shape_parm * 1.5),
|
|
242
|
+
step=2,
|
|
243
|
+
)
|
|
244
|
+
if "units" not in hyperparameters_keys
|
|
245
|
+
else hyperparameters["units"]
|
|
246
|
+
)
|
|
247
|
+
activation = (
|
|
248
|
+
hp.Choice("activation", ["sigmoid", "relu", "tanh", "selu", "softplus", "softsign"])
|
|
249
|
+
if "activation" not in hyperparameters_keys
|
|
250
|
+
else hyperparameters["activation"]
|
|
251
|
+
)
|
|
252
|
+
optimizer = (
|
|
253
|
+
hp.Choice("optimizer", ["sgd", "adam", "adadelta", "rmsprop", "adamax", "adagrad"])
|
|
254
|
+
if "optimizer" not in hyperparameters_keys
|
|
255
|
+
else hyperparameters["optimizer"]
|
|
256
|
+
)
|
|
257
|
+
threshold = (
|
|
258
|
+
hp.Float("threshold", min_value=0.1, max_value=0.9, sampling="log")
|
|
259
|
+
if "threshold" not in hyperparameters_keys
|
|
260
|
+
else hyperparameters["threshold"]
|
|
261
|
+
)
|
|
262
|
+
num_layers = (
|
|
263
|
+
hp.Int("num_layers", min_value=1, max_value=10, step=1)
|
|
264
|
+
if "num_layers" not in hyperparameters_keys
|
|
265
|
+
else hyperparameters["num_layers"]
|
|
266
|
+
)
|
|
267
|
+
dropout = (
|
|
268
|
+
hp.Float("dropout", min_value=0.1, max_value=0.9, sampling="log")
|
|
269
|
+
if "dropout" not in hyperparameters_keys
|
|
270
|
+
else hyperparameters["dropout"]
|
|
173
271
|
)
|
|
174
|
-
activation = hp.Choice("activation", ["sigmoid", "relu", "tanh", "selu", "softplus"])
|
|
175
|
-
optimizer = hp.Choice("optimizer", ["sgd", "adam", "adadelta"])
|
|
176
|
-
threshold = hp.Float("threshold", min_value=0.1, max_value=0.9, sampling="log")
|
|
177
272
|
|
|
178
273
|
model = call_existing_code(
|
|
179
274
|
units=units,
|
|
@@ -182,10 +277,13 @@ def build_model(hp, input_shape_parm: None | int, num_classes: None | int) -> Au
|
|
|
182
277
|
optimizer=optimizer,
|
|
183
278
|
input_shape_parm=input_shape_parm,
|
|
184
279
|
num_classes=num_classes,
|
|
280
|
+
num_layers=num_layers,
|
|
281
|
+
dropout=dropout,
|
|
185
282
|
)
|
|
186
283
|
return model
|
|
187
284
|
|
|
188
285
|
|
|
286
|
+
@suppress_warnings
|
|
189
287
|
def setup_model(
|
|
190
288
|
data: DataFrame,
|
|
191
289
|
target: str,
|
|
@@ -194,6 +292,7 @@ def setup_model(
|
|
|
194
292
|
seed=None,
|
|
195
293
|
train_mode: bool = True,
|
|
196
294
|
filepath: str = "./my_dir/best_model",
|
|
295
|
+
method: str = "Hyperband",
|
|
197
296
|
**kwargs,
|
|
198
297
|
) -> AutoClassifier:
|
|
199
298
|
"""Setup model for training and tuning.
|
|
@@ -214,6 +313,8 @@ def setup_model(
|
|
|
214
313
|
Whether to train the model or not.
|
|
215
314
|
filepath : `str`
|
|
216
315
|
The path to save the best model to.
|
|
316
|
+
method : `str`
|
|
317
|
+
The method to use for hyperparameter tuning. Options are "Hyperband" and "RandomSearch".
|
|
217
318
|
|
|
218
319
|
Keyword Arguments:
|
|
219
320
|
----------
|
|
@@ -229,30 +330,30 @@ def setup_model(
|
|
|
229
330
|
The objective to optimize.
|
|
230
331
|
verbose : `bool`
|
|
231
332
|
Whether to print verbose output.
|
|
333
|
+
hyperparameters : `dict`
|
|
334
|
+
The hyperparameters to set.
|
|
232
335
|
|
|
233
336
|
Returns
|
|
234
337
|
-------
|
|
235
338
|
model : `AutoClassifier`
|
|
236
339
|
The trained model.
|
|
237
340
|
"""
|
|
238
|
-
max_trials = kwargs
|
|
239
|
-
directory = kwargs
|
|
240
|
-
project_name = kwargs
|
|
241
|
-
objective = kwargs
|
|
242
|
-
verbose = kwargs
|
|
341
|
+
max_trials = kwargs.get("max_trials", 10)
|
|
342
|
+
directory = kwargs.get("directory", "./my_dir")
|
|
343
|
+
project_name = kwargs.get("project_name", "get_best")
|
|
344
|
+
objective = kwargs.get("objective", "val_loss")
|
|
345
|
+
verbose = kwargs.get("verbose", True)
|
|
346
|
+
hyperparameters = kwargs.get("hyperparameters", None)
|
|
243
347
|
|
|
244
348
|
X = data.drop(columns=target)
|
|
245
349
|
input_sample = X.sample(1)
|
|
246
350
|
y = data[target]
|
|
247
|
-
# Verify if there are categorical columns in the dataframe
|
|
248
351
|
assert (
|
|
249
352
|
X.select_dtypes(include=["object"]).empty == True
|
|
250
353
|
), "Categorical variables within the DataFrame must be encoded, this is done by using the DataFrameEncoder from likelihood."
|
|
251
354
|
validation_split = 1.0 - train_size
|
|
252
|
-
# Create my_dir path if it does not exist
|
|
253
355
|
|
|
254
356
|
if train_mode:
|
|
255
|
-
# Create a new directory if it does not exist
|
|
256
357
|
try:
|
|
257
358
|
if (not os.path.exists(directory)) and directory != "./":
|
|
258
359
|
os.makedirs(directory)
|
|
@@ -263,7 +364,6 @@ def setup_model(
|
|
|
263
364
|
except:
|
|
264
365
|
print("Warning: unable to create directory")
|
|
265
366
|
|
|
266
|
-
# Create a Classifier instance
|
|
267
367
|
y_encoder = OneHotEncoder()
|
|
268
368
|
y = y_encoder.encode(y.to_list())
|
|
269
369
|
X = X.to_numpy()
|
|
@@ -276,34 +376,242 @@ def setup_model(
|
|
|
276
376
|
num_classes = y.shape[1]
|
|
277
377
|
global build_model
|
|
278
378
|
build_model = partial(
|
|
279
|
-
build_model,
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
tuner = keras_tuner.RandomSearch(
|
|
284
|
-
hypermodel=build_model,
|
|
285
|
-
objective=objective,
|
|
286
|
-
max_trials=max_trials,
|
|
287
|
-
directory=directory,
|
|
288
|
-
project_name=project_name,
|
|
289
|
-
seed=seed,
|
|
379
|
+
build_model,
|
|
380
|
+
input_shape_parm=input_shape_parm,
|
|
381
|
+
num_classes=num_classes,
|
|
382
|
+
hyperparameters=hyperparameters,
|
|
290
383
|
)
|
|
291
384
|
|
|
292
|
-
|
|
385
|
+
if method == "Hyperband":
|
|
386
|
+
tuner = keras_tuner.Hyperband(
|
|
387
|
+
hypermodel=build_model,
|
|
388
|
+
objective=objective,
|
|
389
|
+
max_epochs=epochs,
|
|
390
|
+
factor=3,
|
|
391
|
+
directory=directory,
|
|
392
|
+
project_name=project_name,
|
|
393
|
+
seed=seed,
|
|
394
|
+
)
|
|
395
|
+
elif method == "RandomSearch":
|
|
396
|
+
tuner = keras_tuner.RandomSearch(
|
|
397
|
+
hypermodel=build_model,
|
|
398
|
+
objective=objective,
|
|
399
|
+
max_trials=max_trials,
|
|
400
|
+
directory=directory,
|
|
401
|
+
project_name=project_name,
|
|
402
|
+
seed=seed,
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
tuner.search(X, y, epochs=epochs, validation_split=validation_split, verbose=verbose)
|
|
293
406
|
models = tuner.get_best_models(num_models=2)
|
|
294
407
|
best_model = models[0]
|
|
295
408
|
best_model(input_sample)
|
|
296
409
|
|
|
297
|
-
# save model
|
|
298
410
|
best_model.save(filepath, save_format="tf")
|
|
299
411
|
|
|
300
412
|
if verbose:
|
|
301
413
|
tuner.results_summary()
|
|
302
414
|
else:
|
|
303
|
-
# Load the best model from the directory
|
|
304
415
|
best_model = tf.keras.models.load_model(filepath)
|
|
305
416
|
|
|
306
|
-
|
|
417
|
+
best_hps = tuner.get_best_hyperparameters(1)[0].values
|
|
418
|
+
return best_model, pd.DataFrame(best_hps, index=["Value"])
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
class GetInsights:
|
|
422
|
+
def __init__(self, model: AutoClassifier, inputs: np.ndarray) -> None:
|
|
423
|
+
self.inputs = inputs
|
|
424
|
+
self.model = model
|
|
425
|
+
self.encoder_layer = self.model.encoder.layers[0]
|
|
426
|
+
self.decoder_layer = self.model.decoder.layers[0]
|
|
427
|
+
self.encoder_weights = self.encoder_layer.get_weights()[0]
|
|
428
|
+
self.decoder_weights = self.decoder_layer.get_weights()[0]
|
|
429
|
+
colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)
|
|
430
|
+
|
|
431
|
+
by_hsv = sorted(
|
|
432
|
+
(tuple(mcolors.rgb_to_hsv(mcolors.to_rgba(color)[:3])), name)
|
|
433
|
+
for name, color in colors.items()
|
|
434
|
+
)
|
|
435
|
+
self.sorted_names = [name for hsv, name in by_hsv if hsv[1] > 0.4 and hsv[2] >= 0.4]
|
|
436
|
+
random.shuffle(self.sorted_names)
|
|
437
|
+
|
|
438
|
+
def predictor_analyzer(
|
|
439
|
+
self,
|
|
440
|
+
frac=None,
|
|
441
|
+
cmap: str = "viridis",
|
|
442
|
+
aspect: str = "auto",
|
|
443
|
+
highlight: bool = True,
|
|
444
|
+
**kwargs,
|
|
445
|
+
) -> None:
|
|
446
|
+
self._viz_weights(cmap=cmap, aspect=aspect, highlight=highlight, **kwargs)
|
|
447
|
+
inputs = self.inputs.copy()
|
|
448
|
+
y_labels = kwargs.get("y_labels", None)
|
|
449
|
+
if frac:
|
|
450
|
+
n = int(frac * self.inputs.shape[0])
|
|
451
|
+
indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
|
|
452
|
+
inputs = inputs[indexes]
|
|
453
|
+
inputs[np.isnan(inputs)] = 0.0
|
|
454
|
+
encoded = self.model.encoder(inputs)
|
|
455
|
+
reconstructed = self.model.decoder(encoded)
|
|
456
|
+
combined = tf.concat([reconstructed, encoded], axis=1)
|
|
457
|
+
self.classification = self.model.classifier(combined).numpy().argmax(axis=1)
|
|
458
|
+
ax = plt.subplot(1, 2, 1)
|
|
459
|
+
plt.imshow(self.inputs, cmap=cmap, aspect=aspect)
|
|
460
|
+
plt.colorbar()
|
|
461
|
+
plt.title("Original Data")
|
|
462
|
+
plt.subplot(1, 2, 2, sharex=ax, sharey=ax)
|
|
463
|
+
plt.imshow(reconstructed, cmap=cmap, aspect=aspect)
|
|
464
|
+
plt.colorbar()
|
|
465
|
+
plt.title("Decoder Layer Reconstruction")
|
|
466
|
+
plt.show()
|
|
467
|
+
|
|
468
|
+
self._get_tsne_repr(inputs=inputs, frac=frac)
|
|
469
|
+
self._viz_tsne_repr(c=self.classification)
|
|
470
|
+
|
|
471
|
+
self.data = pd.DataFrame(encoded, columns=[f"Feature {i}" for i in range(encoded.shape[1])])
|
|
472
|
+
self.data_input = pd.DataFrame(
|
|
473
|
+
inputs,
|
|
474
|
+
columns=(
|
|
475
|
+
[f"Feature {i}" for i in range(inputs.shape[1])] if y_labels is None else y_labels
|
|
476
|
+
),
|
|
477
|
+
)
|
|
478
|
+
self.data["class"] = self.classification
|
|
479
|
+
self.data_input["class"] = self.classification
|
|
480
|
+
radviz(self.data, "class", color=self.colors)
|
|
481
|
+
plt.title("Radviz Visualization of Latent Space")
|
|
482
|
+
plt.show()
|
|
483
|
+
|
|
484
|
+
radviz(self.data_input, "class", color=self.colors)
|
|
485
|
+
plt.title("Radviz Visualization of Input Data")
|
|
486
|
+
plt.show()
|
|
487
|
+
return self._statistics(self.data_input)
|
|
488
|
+
|
|
489
|
+
def _statistics(self, data_input: DataFrame, **kwargs) -> DataFrame:
|
|
490
|
+
data = data_input.copy(deep=True)
|
|
491
|
+
|
|
492
|
+
if not pd.api.types.is_string_dtype(data["class"]):
|
|
493
|
+
data["class"] = data["class"].astype(str)
|
|
494
|
+
|
|
495
|
+
data.ffill(inplace=True)
|
|
496
|
+
grouped_data = data.groupby("class")
|
|
497
|
+
|
|
498
|
+
numerical_stats = grouped_data.agg(["mean", "min", "max", "std", "median"])
|
|
499
|
+
numerical_stats.columns = ["_".join(col).strip() for col in numerical_stats.columns.values]
|
|
500
|
+
|
|
501
|
+
def get_mode(x):
|
|
502
|
+
mode_series = x.mode()
|
|
503
|
+
return mode_series.iloc[0] if not mode_series.empty else None
|
|
504
|
+
|
|
505
|
+
mode_stats = grouped_data.apply(get_mode, include_groups=False)
|
|
506
|
+
mode_stats.columns = [f"{col}_mode" for col in mode_stats.columns]
|
|
507
|
+
combined_stats = pd.concat([numerical_stats, mode_stats], axis=1)
|
|
508
|
+
|
|
509
|
+
return combined_stats.T
|
|
510
|
+
|
|
511
|
+
def _viz_weights(
|
|
512
|
+
self, cmap: str = "viridis", aspect: str = "auto", highlight: bool = True, **kwargs
|
|
513
|
+
) -> None:
|
|
514
|
+
title = kwargs.get("title", "Encoder Layer Weights (Dense Layer)")
|
|
515
|
+
y_labels = kwargs.get("y_labels", None)
|
|
516
|
+
cmap_highlight = kwargs.get("cmap_highlight", "Pastel1")
|
|
517
|
+
highlight_mask = np.zeros_like(self.encoder_weights, dtype=bool)
|
|
518
|
+
|
|
519
|
+
plt.imshow(self.encoder_weights, cmap=cmap, aspect=aspect)
|
|
520
|
+
plt.colorbar()
|
|
521
|
+
plt.title(title)
|
|
522
|
+
if y_labels is not None:
|
|
523
|
+
plt.yticks(ticks=np.arange(self.encoder_weights.shape[0]), labels=y_labels)
|
|
524
|
+
if highlight:
|
|
525
|
+
for i, j in enumerate(self.encoder_weights.argmax(axis=1)):
|
|
526
|
+
highlight_mask[i, j] = True
|
|
527
|
+
plt.imshow(
|
|
528
|
+
np.ma.masked_where(~highlight_mask, self.encoder_weights),
|
|
529
|
+
cmap=cmap_highlight,
|
|
530
|
+
alpha=0.5,
|
|
531
|
+
aspect=aspect,
|
|
532
|
+
)
|
|
533
|
+
plt.show()
|
|
534
|
+
|
|
535
|
+
def _get_tsne_repr(self, inputs=None, frac=None) -> None:
|
|
536
|
+
if inputs is None:
|
|
537
|
+
inputs = self.inputs.copy()
|
|
538
|
+
if frac:
|
|
539
|
+
n = int(frac * self.inputs.shape[0])
|
|
540
|
+
indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
|
|
541
|
+
inputs = inputs[indexes]
|
|
542
|
+
inputs[np.isnan(inputs)] = 0.0
|
|
543
|
+
self.latent_representations = inputs @ self.encoder_weights
|
|
544
|
+
|
|
545
|
+
tsne = TSNE(n_components=2)
|
|
546
|
+
self.reduced_data_tsne = tsne.fit_transform(self.latent_representations)
|
|
547
|
+
|
|
548
|
+
def _viz_tsne_repr(self, **kwargs) -> None:
|
|
549
|
+
c = kwargs.get("c", None)
|
|
550
|
+
self.colors = (
|
|
551
|
+
kwargs.get("colors", self.sorted_names[: len(np.unique(c))]) if c is not None else None
|
|
552
|
+
)
|
|
553
|
+
plt.scatter(
|
|
554
|
+
self.reduced_data_tsne[:, 0],
|
|
555
|
+
self.reduced_data_tsne[:, 1],
|
|
556
|
+
cmap=matplotlib.colors.ListedColormap(self.colors) if c is not None else None,
|
|
557
|
+
c=c,
|
|
558
|
+
)
|
|
559
|
+
if c is not None:
|
|
560
|
+
cb = plt.colorbar()
|
|
561
|
+
loc = np.arange(0, max(c), max(c) / float(len(self.colors)))
|
|
562
|
+
cb.set_ticks(loc)
|
|
563
|
+
cb.set_ticklabels(np.unique(c))
|
|
564
|
+
plt.title("t-SNE Visualization of Latent Space")
|
|
565
|
+
plt.xlabel("t-SNE 1")
|
|
566
|
+
plt.ylabel("t-SNE 2")
|
|
567
|
+
plt.show()
|
|
307
568
|
|
|
308
569
|
|
|
309
570
|
########################################################################################
|
|
571
|
+
|
|
572
|
+
if __name__ == "__main__":
|
|
573
|
+
# Example usage
|
|
574
|
+
import pandas as pd
|
|
575
|
+
from sklearn.datasets import load_iris
|
|
576
|
+
from sklearn.preprocessing import OneHotEncoder
|
|
577
|
+
|
|
578
|
+
# Load the dataset
|
|
579
|
+
iris = load_iris()
|
|
580
|
+
|
|
581
|
+
# Convert to a DataFrame for easy exploration
|
|
582
|
+
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
|
|
583
|
+
iris_df["species"] = iris.target
|
|
584
|
+
|
|
585
|
+
X = iris_df.drop(columns="species")
|
|
586
|
+
y_labels = X.columns
|
|
587
|
+
X = X.values
|
|
588
|
+
y = iris_df["species"].values
|
|
589
|
+
|
|
590
|
+
X = np.asarray(X).astype(np.float32)
|
|
591
|
+
|
|
592
|
+
encoder = OneHotEncoder()
|
|
593
|
+
y = encoder.fit_transform(y.reshape(-1, 1)).toarray()
|
|
594
|
+
y = np.asarray(y).astype(np.float32)
|
|
595
|
+
|
|
596
|
+
model = AutoClassifier(
|
|
597
|
+
input_shape_parm=X.shape[1],
|
|
598
|
+
num_classes=3,
|
|
599
|
+
units=27,
|
|
600
|
+
activation="tanh",
|
|
601
|
+
num_layers=2,
|
|
602
|
+
dropout=0.2,
|
|
603
|
+
)
|
|
604
|
+
model.compile(
|
|
605
|
+
optimizer="adam",
|
|
606
|
+
loss=tf.keras.losses.CategoricalCrossentropy(),
|
|
607
|
+
metrics=[tf.keras.metrics.F1Score(threshold=0.5)],
|
|
608
|
+
)
|
|
609
|
+
model.fit(X, y, epochs=50, validation_split=0.2)
|
|
610
|
+
|
|
611
|
+
insights = GetInsights(model, X)
|
|
612
|
+
summary = insights.predictor_analyzer(frac=1.0, y_labels=y_labels)
|
|
613
|
+
insights._get_tsne_repr()
|
|
614
|
+
insights._viz_tsne_repr()
|
|
615
|
+
insights._viz_tsne_repr(c=iris_df["species"])
|
|
616
|
+
insights._viz_weights()
|
|
617
|
+
print(summary)
|
likelihood/models/simulation.py
CHANGED
|
@@ -2,31 +2,25 @@ import pickle
|
|
|
2
2
|
import warnings
|
|
3
3
|
from typing import List, Tuple, Union
|
|
4
4
|
|
|
5
|
-
import matplotlib.pyplot as plt
|
|
6
5
|
import numpy as np
|
|
7
6
|
import pandas as pd
|
|
8
7
|
from pandas.core.frame import DataFrame
|
|
9
8
|
|
|
10
9
|
from likelihood.tools import DataScaler, FeatureSelection, OneHotEncoder, cdf, check_nan_inf
|
|
11
10
|
|
|
12
|
-
# Suppress RankWarning
|
|
13
11
|
warnings.simplefilter("ignore", np.RankWarning)
|
|
14
12
|
|
|
15
13
|
|
|
16
14
|
# --------------------------------------------------------------------------------------------------------------------------------------
|
|
17
15
|
def categories_by_quartile(df: DataFrame, column: str) -> Tuple[str, str]:
|
|
18
|
-
# Count the frequency of each category in the column
|
|
19
16
|
freq = df[column].value_counts()
|
|
20
17
|
|
|
21
|
-
# Calculate the 25th percentile (Q1) and 75th percentile (Q3)
|
|
22
18
|
q1 = freq.quantile(0.25)
|
|
23
19
|
q3 = freq.quantile(0.75)
|
|
24
20
|
|
|
25
|
-
# Filter categories that are below the 25th percentile and above the 75th percentile
|
|
26
21
|
least_frequent = freq[freq <= q1]
|
|
27
22
|
most_frequent = freq[freq >= q3]
|
|
28
23
|
|
|
29
|
-
# Get the least frequent category (25th percentile) and the most frequent category (75th percentile)
|
|
30
24
|
least_frequent_category = least_frequent.idxmin() if not least_frequent.empty else None
|
|
31
25
|
most_frequent_category = most_frequent.idxmax() if not most_frequent.empty else None
|
|
32
26
|
|
likelihood/models/utils.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
|
-
import matplotlib.pyplot as plt
|
|
2
1
|
import numpy as np
|
|
3
|
-
from numpy import ndarray
|
|
4
2
|
|
|
5
3
|
from likelihood.tools import cal_average
|
|
6
4
|
|
|
7
5
|
|
|
8
6
|
class FeaturesArima:
|
|
9
|
-
def forward(self, y_sum: ndarray, theta: list, mode: bool, noise: float):
|
|
7
|
+
def forward(self, y_sum: np.ndarray, theta: list, mode: bool, noise: float):
|
|
10
8
|
if mode:
|
|
11
9
|
y_vec = []
|
|
12
10
|
|
|
@@ -31,20 +29,14 @@ class FeaturesArima:
|
|
|
31
29
|
|
|
32
30
|
return np.array(y_vec)
|
|
33
31
|
|
|
34
|
-
def integrated(self, datapoints: ndarray):
|
|
32
|
+
def integrated(self, datapoints: np.ndarray):
|
|
35
33
|
datapoints = self.datapoints
|
|
36
|
-
# n = datapoints.shape[0]
|
|
37
|
-
|
|
38
|
-
# y_sum = [
|
|
39
|
-
# ((1.0 - datapoints[i - 1] / datapoints[i]) ** self.d) * datapoints[i]
|
|
40
|
-
# for i in range(1, n)
|
|
41
|
-
# ]
|
|
42
34
|
y_sum = list(np.diff(datapoints, self.d))
|
|
43
35
|
y_sum.insert(0, datapoints[0])
|
|
44
36
|
|
|
45
37
|
return np.array(y_sum)
|
|
46
38
|
|
|
47
|
-
def average(self, datapoints: ndarray):
|
|
39
|
+
def average(self, datapoints: np.ndarray):
|
|
48
40
|
y_sum_average = cal_average(datapoints)
|
|
49
41
|
y_sum_eps = datapoints - y_sum_average
|
|
50
42
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: likelihood
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: A package that performs the maximum likelihood algorithm.
|
|
5
5
|
Home-page: https://github.com/jzsmoreno/likelihood/
|
|
6
6
|
Author: J. A. Moreno-Guerra
|
|
@@ -13,7 +13,7 @@ Classifier: Operating System :: OS Independent
|
|
|
13
13
|
Requires-Python: >=3.10
|
|
14
14
|
Description-Content-Type: text/markdown
|
|
15
15
|
License-File: LICENSE
|
|
16
|
-
Requires-Dist: black[jupyter]
|
|
16
|
+
Requires-Dist: black[jupyter]>=24.3.0
|
|
17
17
|
Requires-Dist: mypy-extensions==1.0.0
|
|
18
18
|
Requires-Dist: types-openpyxl==3.1.0.15
|
|
19
19
|
Requires-Dist: pydocstyle==6.3.0
|
|
@@ -31,6 +31,18 @@ Requires-Dist: pyvis; extra == "full"
|
|
|
31
31
|
Requires-Dist: tensorflow==2.15.0; extra == "full"
|
|
32
32
|
Requires-Dist: keras-tuner; extra == "full"
|
|
33
33
|
Requires-Dist: scikit-learn; extra == "full"
|
|
34
|
+
Dynamic: author
|
|
35
|
+
Dynamic: author-email
|
|
36
|
+
Dynamic: classifier
|
|
37
|
+
Dynamic: description
|
|
38
|
+
Dynamic: description-content-type
|
|
39
|
+
Dynamic: home-page
|
|
40
|
+
Dynamic: maintainer
|
|
41
|
+
Dynamic: maintainer-email
|
|
42
|
+
Dynamic: provides-extra
|
|
43
|
+
Dynamic: requires-dist
|
|
44
|
+
Dynamic: requires-python
|
|
45
|
+
Dynamic: summary
|
|
34
46
|
|
|
35
47
|

|
|
36
48
|
|
|
@@ -1,20 +1,20 @@
|
|
|
1
1
|
likelihood/__init__.py,sha256=5C0hapdsk85XZhN_rssRAEFpkRRuKNtj6cyRbqD2_gM,994
|
|
2
2
|
likelihood/main.py,sha256=fcCkGOOWKjfvw2tLVqjuKPV8t0rVCIT9FlbYcOv4EYo,7974
|
|
3
3
|
likelihood/graph/__init__.py,sha256=6TuFDfmXTwpLyHl7_KqBfdzW6zqHjGzIFvymjFPlvjI,21
|
|
4
|
-
likelihood/graph/graph.py,sha256=
|
|
5
|
-
likelihood/graph/nn.py,sha256
|
|
4
|
+
likelihood/graph/graph.py,sha256=bLrNMvIh7GOTdPTwnNss8oPZ7cbSHQScAsH_ttmVUK0,3294
|
|
5
|
+
likelihood/graph/nn.py,sha256=-OvHAeB3l2nd0ZeAk03cVDGBgaTn-WyGIsj5Rq7XeCY,12237
|
|
6
6
|
likelihood/models/__init__.py,sha256=e6nB4w47w0Q9DrAFeP3OcUgcoHOtf7Il4mBhgf4AARg,52
|
|
7
7
|
likelihood/models/hmm.py,sha256=0s0gFySH1u4NjRaZDxiZ8oeTaFhFrw1x0GJxwy3dFrA,6253
|
|
8
8
|
likelihood/models/regression.py,sha256=9cakyGlJCEO6WfpoKLh3GxdXQeQp7cUvJIkQ5odT0TA,9404
|
|
9
|
-
likelihood/models/simulation.py,sha256=
|
|
10
|
-
likelihood/models/utils.py,sha256=
|
|
9
|
+
likelihood/models/simulation.py,sha256=LFyE_szo7sDukviMLeg_6RoyAaI7yMXUy8f4mDOrGoc,8460
|
|
10
|
+
likelihood/models/utils.py,sha256=dvigPi_hxcs5ntfHr7Y1JvP5ULtMW3kkN0nJpS4orE8,1319
|
|
11
11
|
likelihood/models/deep/__init__.py,sha256=-KIPippVaMqgG8mEgYjNxYQdqOUcFhUuKhbVe8TTCfo,28
|
|
12
|
-
likelihood/models/deep/autoencoders.py,sha256=
|
|
12
|
+
likelihood/models/deep/autoencoders.py,sha256=KtEQhYhZcEUALjWuYeTtb2ASurluHcWzKl6c7kS6E78,21135
|
|
13
13
|
likelihood/tools/__init__.py,sha256=MCjsCWfBNKE2uMN0VizDN1uFzZ_md0X2WZeBdWhrCR8,50
|
|
14
14
|
likelihood/tools/numeric_tools.py,sha256=FA44kbiAcxcquz1el_g3Pqsp5ii8XFkAIrsMs5bGkj0,11445
|
|
15
15
|
likelihood/tools/tools.py,sha256=iZBC7IHTFpAyxooyel7ZFi-5-G0nCotNLLtxenPw9T8,44303
|
|
16
|
-
likelihood-1.
|
|
17
|
-
likelihood-1.
|
|
18
|
-
likelihood-1.
|
|
19
|
-
likelihood-1.
|
|
20
|
-
likelihood-1.
|
|
16
|
+
likelihood-1.3.0.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
|
|
17
|
+
likelihood-1.3.0.dist-info/METADATA,sha256=7-V4936jT_W1GHOxbaiBrM7uZhRzHCsxycGGxNq1fR0,2770
|
|
18
|
+
likelihood-1.3.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
19
|
+
likelihood-1.3.0.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
|
|
20
|
+
likelihood-1.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|