likelihood 1.2.24__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {likelihood-1.2.24 → likelihood-1.3.0}/PKG-INFO +15 -3
  2. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood/graph/graph.py +3 -3
  3. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood/graph/nn.py +6 -8
  4. likelihood-1.3.0/likelihood/models/deep/autoencoders.py +617 -0
  5. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood/models/simulation.py +0 -6
  6. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood/models/utils.py +3 -11
  7. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood.egg-info/PKG-INFO +15 -3
  8. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood.egg-info/requires.txt +1 -1
  9. likelihood-1.2.24/likelihood/models/deep/autoencoders.py +0 -309
  10. {likelihood-1.2.24 → likelihood-1.3.0}/LICENSE +0 -0
  11. {likelihood-1.2.24 → likelihood-1.3.0}/README.md +0 -0
  12. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood/__init__.py +0 -0
  13. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood/graph/__init__.py +0 -0
  14. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood/main.py +0 -0
  15. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood/models/__init__.py +0 -0
  16. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood/models/deep/__init__.py +0 -0
  17. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood/models/hmm.py +0 -0
  18. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood/models/regression.py +0 -0
  19. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood/tools/__init__.py +0 -0
  20. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood/tools/numeric_tools.py +0 -0
  21. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood/tools/tools.py +0 -0
  22. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood.egg-info/SOURCES.txt +0 -0
  23. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood.egg-info/dependency_links.txt +0 -0
  24. {likelihood-1.2.24 → likelihood-1.3.0}/likelihood.egg-info/top_level.txt +0 -0
  25. {likelihood-1.2.24 → likelihood-1.3.0}/setup.cfg +0 -0
  26. {likelihood-1.2.24 → likelihood-1.3.0}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: likelihood
3
- Version: 1.2.24
3
+ Version: 1.3.0
4
4
  Summary: A package that performs the maximum likelihood algorithm.
5
5
  Home-page: https://github.com/jzsmoreno/likelihood/
6
6
  Author: J. A. Moreno-Guerra
@@ -13,7 +13,7 @@ Classifier: Operating System :: OS Independent
13
13
  Requires-Python: >=3.10
14
14
  Description-Content-Type: text/markdown
15
15
  License-File: LICENSE
16
- Requires-Dist: black[jupyter]==24.1.1
16
+ Requires-Dist: black[jupyter]>=24.3.0
17
17
  Requires-Dist: mypy-extensions==1.0.0
18
18
  Requires-Dist: types-openpyxl==3.1.0.15
19
19
  Requires-Dist: pydocstyle==6.3.0
@@ -31,6 +31,18 @@ Requires-Dist: pyvis; extra == "full"
31
31
  Requires-Dist: tensorflow==2.15.0; extra == "full"
32
32
  Requires-Dist: keras-tuner; extra == "full"
33
33
  Requires-Dist: scikit-learn; extra == "full"
34
+ Dynamic: author
35
+ Dynamic: author-email
36
+ Dynamic: classifier
37
+ Dynamic: description
38
+ Dynamic: description-content-type
39
+ Dynamic: home-page
40
+ Dynamic: maintainer
41
+ Dynamic: maintainer-email
42
+ Dynamic: provides-extra
43
+ Dynamic: requires-dist
44
+ Dynamic: requires-python
45
+ Dynamic: summary
34
46
 
35
47
  ![likelihood](https://raw.githubusercontent.com/RodolfoFerro/likelihood/main/likelihood.png)
36
48
 
@@ -45,8 +45,8 @@ class DynamicGraph(FeatureSelection):
45
45
 
46
46
  def draw(self, name="graph.html", **kwargs) -> None:
47
47
  """Display the network using HTML format"""
48
- spring_length = kwargs["spring_length"] if "spring_length" in kwargs else 500
49
- node_distance = kwargs["node_distance"] if "node_distance" in kwargs else 100
48
+ spring_length = kwargs.get("spring_length", 500)
49
+ node_distance = kwargs.get("node_distance", 100)
50
50
  self.G.repulsion(node_distance=node_distance, spring_length=spring_length)
51
51
  self.G.show_buttons(filter_=["physics"])
52
52
  self.G.show(name)
@@ -89,5 +89,5 @@ if __name__ == "__main__":
89
89
  df["y"] = y
90
90
  # Instantiate DynamicGraph
91
91
  fs = DynamicGraph(df, n_importances=2)
92
- print(fs.fit())
92
+ fs.fit()
93
93
  fs.draw()
@@ -1,9 +1,9 @@
1
+ import logging
1
2
  import os
2
3
 
3
- os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
4
- # Suppress TensorFlow INFO logs
5
- os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
6
- import logging
4
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
5
+ logging.getLogger("tensorflow").setLevel(logging.ERROR)
6
+
7
7
  import warnings
8
8
  from typing import List, Tuple
9
9
 
@@ -17,9 +17,7 @@ from sklearn.model_selection import train_test_split
17
17
 
18
18
  from likelihood.tools import generate_feature_yaml
19
19
 
20
- logging.getLogger("tensorflow").setLevel(logging.ERROR)
21
-
22
- tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
20
+ tf.get_logger().setLevel("ERROR")
23
21
 
24
22
 
25
23
  def compare_similarity(arr1: np.ndarray, arr2: np.ndarray) -> int:
@@ -98,7 +96,7 @@ def cal_adjacency_matrix(
98
96
 
99
97
  assert len(df_categorical) > 0
100
98
 
101
- similarity = kwargs["similarity"] if "similarity" in kwargs else len(df_categorical.columns) - 1
99
+ similarity = kwargs.get("similarity", len(df_categorical.columns) - 1)
102
100
  assert similarity <= df_categorical.shape[1]
103
101
 
104
102
  adj_dict = {}
@@ -0,0 +1,617 @@
1
+ import logging
2
+ import os
3
+ import random
4
+ from functools import partial
5
+ from shutil import rmtree
6
+
7
+ import matplotlib
8
+ import matplotlib.colors as mcolors
9
+ import matplotlib.pyplot as plt
10
+ import numpy as np
11
+ import pandas as pd
12
+ from pandas.plotting import radviz
13
+
14
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
15
+ logging.getLogger("tensorflow").setLevel(logging.ERROR)
16
+
17
+ import warnings
18
+ from functools import wraps
19
+
20
+ import keras_tuner
21
+ import tensorflow as tf
22
+ from pandas.core.frame import DataFrame
23
+ from sklearn.manifold import TSNE
24
+
25
+ from likelihood.tools import OneHotEncoder
26
+
27
+ tf.get_logger().setLevel("ERROR")
28
+
29
+
30
+ def suppress_warnings(func):
31
+ @wraps(func)
32
+ def wrapper(*args, **kwargs):
33
+ with warnings.catch_warnings():
34
+ warnings.simplefilter("ignore")
35
+ return func(*args, **kwargs)
36
+
37
+ return wrapper
38
+
39
+
40
+ @tf.keras.utils.register_keras_serializable(package="Custom", name="AutoClassifier")
41
+ class AutoClassifier(tf.keras.Model):
42
+ """
43
+ An auto-classifier model that automatically determines the best classification strategy based on the input data.
44
+
45
+ Attributes:
46
+ - input_shape_parm: The shape of the input data.
47
+ - num_classes: The number of classes in the dataset.
48
+ - units: The number of neurons in each hidden layer.
49
+ - activation: The type of activation function to use for the neural network layers.
50
+
51
+ Methods:
52
+ __init__(self, input_shape_parm, num_classes, units, activation): Initializes an AutoClassifier instance with the given parameters.
53
+ build(self, input_shape_parm): Builds the model architecture based on input_shape_parm.
54
+ call(self, x): Defines the forward pass of the model.
55
+ get_config(self): Returns the configuration of the model.
56
+ from_config(cls, config): Recreates an instance of AutoClassifier from its configuration.
57
+ """
58
+
59
+ def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
60
+ """
61
+ Initializes an AutoClassifier instance with the given parameters.
62
+
63
+ Parameters
64
+ ----------
65
+ input_shape_parm : `int`
66
+ The shape of the input data.
67
+ num_classes : `int`
68
+ The number of classes in the dataset.
69
+ units : `int`
70
+ The number of neurons in each hidden layer.
71
+ activation : `str`
72
+ The type of activation function to use for the neural network layers.
73
+
74
+ Keyword Arguments:
75
+ ----------
76
+ Additional keyword arguments to pass to the model.
77
+
78
+ classifier_activation : `str`
79
+ The activation function to use for the classifier layer. Default is "softmax". If the activation function is not a classification function, the model can be used in regression problems.
80
+ num_layers : `int`
81
+ The number of hidden layers in the classifier. Default is 1.
82
+ dropout : `float`
83
+ The dropout rate to use in the classifier. Default is None.
84
+ """
85
+ super(AutoClassifier, self).__init__()
86
+ self.input_shape_parm = input_shape_parm
87
+ self.num_classes = num_classes
88
+ self.units = units
89
+ self.activation = activation
90
+
91
+ self.encoder = None
92
+ self.decoder = None
93
+ self.classifier = None
94
+ self.classifier_activation = kwargs.get("classifier_activation", "softmax")
95
+ self.num_layers = kwargs.get("num_layers", 1)
96
+ self.dropout = kwargs.get("dropout", None)
97
+
98
+ def build(self, input_shape):
99
+ self.encoder = tf.keras.Sequential(
100
+ [
101
+ tf.keras.layers.Dense(units=self.units, activation=self.activation),
102
+ tf.keras.layers.Dense(units=int(self.units / 2), activation=self.activation),
103
+ ]
104
+ )
105
+
106
+ self.decoder = tf.keras.Sequential(
107
+ [
108
+ tf.keras.layers.Dense(units=self.units, activation=self.activation),
109
+ tf.keras.layers.Dense(units=self.input_shape_parm, activation=self.activation),
110
+ ]
111
+ )
112
+
113
+ self.classifier = tf.keras.Sequential()
114
+ if self.num_layers > 1:
115
+ for _ in range(self.num_layers - 1):
116
+ self.classifier.add(
117
+ tf.keras.layers.Dense(units=self.units, activation=self.activation)
118
+ )
119
+ if self.dropout:
120
+ self.classifier.add(tf.keras.layers.Dropout(self.dropout))
121
+ self.classifier.add(
122
+ tf.keras.layers.Dense(units=self.num_classes, activation=self.classifier_activation)
123
+ )
124
+
125
+ def call(self, x):
126
+ encoded = self.encoder(x)
127
+ decoded = self.decoder(encoded)
128
+ combined = tf.concat([decoded, encoded], axis=1)
129
+ classification = self.classifier(combined)
130
+ return classification
131
+
132
+ def get_config(self):
133
+ config = {
134
+ "input_shape_parm": self.input_shape_parm,
135
+ "num_classes": self.num_classes,
136
+ "units": self.units,
137
+ "activation": self.activation,
138
+ "classifier_activation": self.classifier_activation,
139
+ "num_layers": self.num_layers,
140
+ "dropout": self.dropout,
141
+ }
142
+ base_config = super(AutoClassifier, self).get_config()
143
+ return dict(list(base_config.items()) + list(config.items()))
144
+
145
+ @classmethod
146
+ def from_config(cls, config):
147
+ return cls(
148
+ input_shape_parm=config["input_shape_parm"],
149
+ num_classes=config["num_classes"],
150
+ units=config["units"],
151
+ activation=config["activation"],
152
+ classifier_activation=config["classifier_activation"],
153
+ num_layers=config["num_layers"],
154
+ dropout=config["dropout"],
155
+ )
156
+
157
+
158
+ def call_existing_code(
159
+ units: int,
160
+ activation: str,
161
+ threshold: float,
162
+ optimizer: str,
163
+ input_shape_parm: None | int = None,
164
+ num_classes: None | int = None,
165
+ num_layers: int = 1,
166
+ **kwargs,
167
+ ) -> AutoClassifier:
168
+ """
169
+ Calls an existing AutoClassifier instance.
170
+
171
+ Parameters
172
+ ----------
173
+ units : `int`
174
+ The number of neurons in each hidden layer.
175
+ activation : `str`
176
+ The type of activation function to use for the neural network layers.
177
+ threshold : `float`
178
+ The threshold for the classifier.
179
+ optimizer : `str`
180
+ The type of optimizer to use for the neural network layers.
181
+ input_shape_parm : `None` | `int`
182
+ The shape of the input data.
183
+ num_classes : `int`
184
+ The number of classes in the dataset.
185
+
186
+ Returns
187
+ -------
188
+ `AutoClassifier`
189
+ The AutoClassifier instance.
190
+ """
191
+ dropout = kwargs.get("dropout", None)
192
+ model = AutoClassifier(
193
+ input_shape_parm=input_shape_parm,
194
+ num_classes=num_classes,
195
+ units=units,
196
+ activation=activation,
197
+ num_layers=num_layers,
198
+ dropout=dropout,
199
+ )
200
+ model.compile(
201
+ optimizer=optimizer,
202
+ loss=tf.keras.losses.CategoricalCrossentropy(),
203
+ metrics=[tf.keras.metrics.F1Score(threshold=threshold)],
204
+ )
205
+ return model
206
+
207
+
208
+ def build_model(
209
+ hp, input_shape_parm: None | int, num_classes: None | int, **kwargs
210
+ ) -> AutoClassifier:
211
+ """Builds a neural network model using Keras Tuner's search algorithm.
212
+
213
+ Parameters
214
+ ----------
215
+ hp : `keras_tuner.HyperParameters`
216
+ The hyperparameters to tune.
217
+ input_shape_parm : `None` | `int`
218
+ The shape of the input data.
219
+ num_classes : `int`
220
+ The number of classes in the dataset.
221
+
222
+ Keyword Arguments:
223
+ ----------
224
+ Additional keyword arguments to pass to the model.
225
+
226
+ hyperparameters : `dict`
227
+ The hyperparameters to set.
228
+
229
+ Returns
230
+ -------
231
+ `keras.Model`
232
+ The neural network model.
233
+ """
234
+ hyperparameters = kwargs.get("hyperparameters", None)
235
+ hyperparameters_keys = hyperparameters.keys() if hyperparameters is not None else []
236
+
237
+ units = (
238
+ hp.Int(
239
+ "units",
240
+ min_value=int(input_shape_parm * 0.2),
241
+ max_value=int(input_shape_parm * 1.5),
242
+ step=2,
243
+ )
244
+ if "units" not in hyperparameters_keys
245
+ else hyperparameters["units"]
246
+ )
247
+ activation = (
248
+ hp.Choice("activation", ["sigmoid", "relu", "tanh", "selu", "softplus", "softsign"])
249
+ if "activation" not in hyperparameters_keys
250
+ else hyperparameters["activation"]
251
+ )
252
+ optimizer = (
253
+ hp.Choice("optimizer", ["sgd", "adam", "adadelta", "rmsprop", "adamax", "adagrad"])
254
+ if "optimizer" not in hyperparameters_keys
255
+ else hyperparameters["optimizer"]
256
+ )
257
+ threshold = (
258
+ hp.Float("threshold", min_value=0.1, max_value=0.9, sampling="log")
259
+ if "threshold" not in hyperparameters_keys
260
+ else hyperparameters["threshold"]
261
+ )
262
+ num_layers = (
263
+ hp.Int("num_layers", min_value=1, max_value=10, step=1)
264
+ if "num_layers" not in hyperparameters_keys
265
+ else hyperparameters["num_layers"]
266
+ )
267
+ dropout = (
268
+ hp.Float("dropout", min_value=0.1, max_value=0.9, sampling="log")
269
+ if "dropout" not in hyperparameters_keys
270
+ else hyperparameters["dropout"]
271
+ )
272
+
273
+ model = call_existing_code(
274
+ units=units,
275
+ activation=activation,
276
+ threshold=threshold,
277
+ optimizer=optimizer,
278
+ input_shape_parm=input_shape_parm,
279
+ num_classes=num_classes,
280
+ num_layers=num_layers,
281
+ dropout=dropout,
282
+ )
283
+ return model
284
+
285
+
286
+ @suppress_warnings
287
+ def setup_model(
288
+ data: DataFrame,
289
+ target: str,
290
+ epochs: int,
291
+ train_size: float = 0.7,
292
+ seed=None,
293
+ train_mode: bool = True,
294
+ filepath: str = "./my_dir/best_model",
295
+ method: str = "Hyperband",
296
+ **kwargs,
297
+ ) -> AutoClassifier:
298
+ """Setup model for training and tuning.
299
+
300
+ Parameters
301
+ ----------
302
+ data : `DataFrame`
303
+ The dataset to train the model on.
304
+ target : `str`
305
+ The name of the target column.
306
+ epochs : `int`
307
+ The number of epochs to train the model for.
308
+ train_size : `float`
309
+ The proportion of the dataset to use for training.
310
+ seed : `Any` | `int`
311
+ The random seed to use for reproducibility.
312
+ train_mode : `bool`
313
+ Whether to train the model or not.
314
+ filepath : `str`
315
+ The path to save the best model to.
316
+ method : `str`
317
+ The method to use for hyperparameter tuning. Options are "Hyperband" and "RandomSearch".
318
+
319
+ Keyword Arguments:
320
+ ----------
321
+ Additional keyword arguments to pass to the model.
322
+
323
+ max_trials : `int`
324
+ The maximum number of trials to perform.
325
+ directory : `str`
326
+ The directory to save the model to.
327
+ project_name : `str`
328
+ The name of the project.
329
+ objective : `str`
330
+ The objective to optimize.
331
+ verbose : `bool`
332
+ Whether to print verbose output.
333
+ hyperparameters : `dict`
334
+ The hyperparameters to set.
335
+
336
+ Returns
337
+ -------
338
+ model : `AutoClassifier`
339
+ The trained model.
340
+ """
341
+ max_trials = kwargs.get("max_trials", 10)
342
+ directory = kwargs.get("directory", "./my_dir")
343
+ project_name = kwargs.get("project_name", "get_best")
344
+ objective = kwargs.get("objective", "val_loss")
345
+ verbose = kwargs.get("verbose", True)
346
+ hyperparameters = kwargs.get("hyperparameters", None)
347
+
348
+ X = data.drop(columns=target)
349
+ input_sample = X.sample(1)
350
+ y = data[target]
351
+ assert (
352
+ X.select_dtypes(include=["object"]).empty == True
353
+ ), "Categorical variables within the DataFrame must be encoded, this is done by using the DataFrameEncoder from likelihood."
354
+ validation_split = 1.0 - train_size
355
+
356
+ if train_mode:
357
+ try:
358
+ if (not os.path.exists(directory)) and directory != "./":
359
+ os.makedirs(directory)
360
+ elif directory != "./":
361
+ print(f"Directory {directory} already exists, it will be deleted.")
362
+ rmtree(directory)
363
+ os.makedirs(directory)
364
+ except:
365
+ print("Warning: unable to create directory")
366
+
367
+ y_encoder = OneHotEncoder()
368
+ y = y_encoder.encode(y.to_list())
369
+ X = X.to_numpy()
370
+ input_sample.to_numpy()
371
+ X = np.asarray(X).astype(np.float32)
372
+ input_sample = np.asarray(input_sample).astype(np.float32)
373
+ y = np.asarray(y).astype(np.float32)
374
+
375
+ input_shape_parm = X.shape[1]
376
+ num_classes = y.shape[1]
377
+ global build_model
378
+ build_model = partial(
379
+ build_model,
380
+ input_shape_parm=input_shape_parm,
381
+ num_classes=num_classes,
382
+ hyperparameters=hyperparameters,
383
+ )
384
+
385
+ if method == "Hyperband":
386
+ tuner = keras_tuner.Hyperband(
387
+ hypermodel=build_model,
388
+ objective=objective,
389
+ max_epochs=epochs,
390
+ factor=3,
391
+ directory=directory,
392
+ project_name=project_name,
393
+ seed=seed,
394
+ )
395
+ elif method == "RandomSearch":
396
+ tuner = keras_tuner.RandomSearch(
397
+ hypermodel=build_model,
398
+ objective=objective,
399
+ max_trials=max_trials,
400
+ directory=directory,
401
+ project_name=project_name,
402
+ seed=seed,
403
+ )
404
+
405
+ tuner.search(X, y, epochs=epochs, validation_split=validation_split, verbose=verbose)
406
+ models = tuner.get_best_models(num_models=2)
407
+ best_model = models[0]
408
+ best_model(input_sample)
409
+
410
+ best_model.save(filepath, save_format="tf")
411
+
412
+ if verbose:
413
+ tuner.results_summary()
414
+ else:
415
+ best_model = tf.keras.models.load_model(filepath)
416
+
417
+ best_hps = tuner.get_best_hyperparameters(1)[0].values
418
+ return best_model, pd.DataFrame(best_hps, index=["Value"])
419
+
420
+
421
+ class GetInsights:
422
+ def __init__(self, model: AutoClassifier, inputs: np.ndarray) -> None:
423
+ self.inputs = inputs
424
+ self.model = model
425
+ self.encoder_layer = self.model.encoder.layers[0]
426
+ self.decoder_layer = self.model.decoder.layers[0]
427
+ self.encoder_weights = self.encoder_layer.get_weights()[0]
428
+ self.decoder_weights = self.decoder_layer.get_weights()[0]
429
+ colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)
430
+
431
+ by_hsv = sorted(
432
+ (tuple(mcolors.rgb_to_hsv(mcolors.to_rgba(color)[:3])), name)
433
+ for name, color in colors.items()
434
+ )
435
+ self.sorted_names = [name for hsv, name in by_hsv if hsv[1] > 0.4 and hsv[2] >= 0.4]
436
+ random.shuffle(self.sorted_names)
437
+
438
+ def predictor_analyzer(
439
+ self,
440
+ frac=None,
441
+ cmap: str = "viridis",
442
+ aspect: str = "auto",
443
+ highlight: bool = True,
444
+ **kwargs,
445
+ ) -> None:
446
+ self._viz_weights(cmap=cmap, aspect=aspect, highlight=highlight, **kwargs)
447
+ inputs = self.inputs.copy()
448
+ y_labels = kwargs.get("y_labels", None)
449
+ if frac:
450
+ n = int(frac * self.inputs.shape[0])
451
+ indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
452
+ inputs = inputs[indexes]
453
+ inputs[np.isnan(inputs)] = 0.0
454
+ encoded = self.model.encoder(inputs)
455
+ reconstructed = self.model.decoder(encoded)
456
+ combined = tf.concat([reconstructed, encoded], axis=1)
457
+ self.classification = self.model.classifier(combined).numpy().argmax(axis=1)
458
+ ax = plt.subplot(1, 2, 1)
459
+ plt.imshow(self.inputs, cmap=cmap, aspect=aspect)
460
+ plt.colorbar()
461
+ plt.title("Original Data")
462
+ plt.subplot(1, 2, 2, sharex=ax, sharey=ax)
463
+ plt.imshow(reconstructed, cmap=cmap, aspect=aspect)
464
+ plt.colorbar()
465
+ plt.title("Decoder Layer Reconstruction")
466
+ plt.show()
467
+
468
+ self._get_tsne_repr(inputs=inputs, frac=frac)
469
+ self._viz_tsne_repr(c=self.classification)
470
+
471
+ self.data = pd.DataFrame(encoded, columns=[f"Feature {i}" for i in range(encoded.shape[1])])
472
+ self.data_input = pd.DataFrame(
473
+ inputs,
474
+ columns=(
475
+ [f"Feature {i}" for i in range(inputs.shape[1])] if y_labels is None else y_labels
476
+ ),
477
+ )
478
+ self.data["class"] = self.classification
479
+ self.data_input["class"] = self.classification
480
+ radviz(self.data, "class", color=self.colors)
481
+ plt.title("Radviz Visualization of Latent Space")
482
+ plt.show()
483
+
484
+ radviz(self.data_input, "class", color=self.colors)
485
+ plt.title("Radviz Visualization of Input Data")
486
+ plt.show()
487
+ return self._statistics(self.data_input)
488
+
489
+ def _statistics(self, data_input: DataFrame, **kwargs) -> DataFrame:
490
+ data = data_input.copy(deep=True)
491
+
492
+ if not pd.api.types.is_string_dtype(data["class"]):
493
+ data["class"] = data["class"].astype(str)
494
+
495
+ data.ffill(inplace=True)
496
+ grouped_data = data.groupby("class")
497
+
498
+ numerical_stats = grouped_data.agg(["mean", "min", "max", "std", "median"])
499
+ numerical_stats.columns = ["_".join(col).strip() for col in numerical_stats.columns.values]
500
+
501
+ def get_mode(x):
502
+ mode_series = x.mode()
503
+ return mode_series.iloc[0] if not mode_series.empty else None
504
+
505
+ mode_stats = grouped_data.apply(get_mode, include_groups=False)
506
+ mode_stats.columns = [f"{col}_mode" for col in mode_stats.columns]
507
+ combined_stats = pd.concat([numerical_stats, mode_stats], axis=1)
508
+
509
+ return combined_stats.T
510
+
511
+ def _viz_weights(
512
+ self, cmap: str = "viridis", aspect: str = "auto", highlight: bool = True, **kwargs
513
+ ) -> None:
514
+ title = kwargs.get("title", "Encoder Layer Weights (Dense Layer)")
515
+ y_labels = kwargs.get("y_labels", None)
516
+ cmap_highlight = kwargs.get("cmap_highlight", "Pastel1")
517
+ highlight_mask = np.zeros_like(self.encoder_weights, dtype=bool)
518
+
519
+ plt.imshow(self.encoder_weights, cmap=cmap, aspect=aspect)
520
+ plt.colorbar()
521
+ plt.title(title)
522
+ if y_labels is not None:
523
+ plt.yticks(ticks=np.arange(self.encoder_weights.shape[0]), labels=y_labels)
524
+ if highlight:
525
+ for i, j in enumerate(self.encoder_weights.argmax(axis=1)):
526
+ highlight_mask[i, j] = True
527
+ plt.imshow(
528
+ np.ma.masked_where(~highlight_mask, self.encoder_weights),
529
+ cmap=cmap_highlight,
530
+ alpha=0.5,
531
+ aspect=aspect,
532
+ )
533
+ plt.show()
534
+
535
+ def _get_tsne_repr(self, inputs=None, frac=None) -> None:
536
+ if inputs is None:
537
+ inputs = self.inputs.copy()
538
+ if frac:
539
+ n = int(frac * self.inputs.shape[0])
540
+ indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
541
+ inputs = inputs[indexes]
542
+ inputs[np.isnan(inputs)] = 0.0
543
+ self.latent_representations = inputs @ self.encoder_weights
544
+
545
+ tsne = TSNE(n_components=2)
546
+ self.reduced_data_tsne = tsne.fit_transform(self.latent_representations)
547
+
548
+ def _viz_tsne_repr(self, **kwargs) -> None:
549
+ c = kwargs.get("c", None)
550
+ self.colors = (
551
+ kwargs.get("colors", self.sorted_names[: len(np.unique(c))]) if c is not None else None
552
+ )
553
+ plt.scatter(
554
+ self.reduced_data_tsne[:, 0],
555
+ self.reduced_data_tsne[:, 1],
556
+ cmap=matplotlib.colors.ListedColormap(self.colors) if c is not None else None,
557
+ c=c,
558
+ )
559
+ if c is not None:
560
+ cb = plt.colorbar()
561
+ loc = np.arange(0, max(c), max(c) / float(len(self.colors)))
562
+ cb.set_ticks(loc)
563
+ cb.set_ticklabels(np.unique(c))
564
+ plt.title("t-SNE Visualization of Latent Space")
565
+ plt.xlabel("t-SNE 1")
566
+ plt.ylabel("t-SNE 2")
567
+ plt.show()
568
+
569
+
570
+ ########################################################################################
571
+
572
+ if __name__ == "__main__":
573
+ # Example usage
574
+ import pandas as pd
575
+ from sklearn.datasets import load_iris
576
+ from sklearn.preprocessing import OneHotEncoder
577
+
578
+ # Load the dataset
579
+ iris = load_iris()
580
+
581
+ # Convert to a DataFrame for easy exploration
582
+ iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
583
+ iris_df["species"] = iris.target
584
+
585
+ X = iris_df.drop(columns="species")
586
+ y_labels = X.columns
587
+ X = X.values
588
+ y = iris_df["species"].values
589
+
590
+ X = np.asarray(X).astype(np.float32)
591
+
592
+ encoder = OneHotEncoder()
593
+ y = encoder.fit_transform(y.reshape(-1, 1)).toarray()
594
+ y = np.asarray(y).astype(np.float32)
595
+
596
+ model = AutoClassifier(
597
+ input_shape_parm=X.shape[1],
598
+ num_classes=3,
599
+ units=27,
600
+ activation="tanh",
601
+ num_layers=2,
602
+ dropout=0.2,
603
+ )
604
+ model.compile(
605
+ optimizer="adam",
606
+ loss=tf.keras.losses.CategoricalCrossentropy(),
607
+ metrics=[tf.keras.metrics.F1Score(threshold=0.5)],
608
+ )
609
+ model.fit(X, y, epochs=50, validation_split=0.2)
610
+
611
+ insights = GetInsights(model, X)
612
+ summary = insights.predictor_analyzer(frac=1.0, y_labels=y_labels)
613
+ insights._get_tsne_repr()
614
+ insights._viz_tsne_repr()
615
+ insights._viz_tsne_repr(c=iris_df["species"])
616
+ insights._viz_weights()
617
+ print(summary)
@@ -2,31 +2,25 @@ import pickle
2
2
  import warnings
3
3
  from typing import List, Tuple, Union
4
4
 
5
- import matplotlib.pyplot as plt
6
5
  import numpy as np
7
6
  import pandas as pd
8
7
  from pandas.core.frame import DataFrame
9
8
 
10
9
  from likelihood.tools import DataScaler, FeatureSelection, OneHotEncoder, cdf, check_nan_inf
11
10
 
12
- # Suppress RankWarning
13
11
  warnings.simplefilter("ignore", np.RankWarning)
14
12
 
15
13
 
16
14
  # --------------------------------------------------------------------------------------------------------------------------------------
17
15
  def categories_by_quartile(df: DataFrame, column: str) -> Tuple[str, str]:
18
- # Count the frequency of each category in the column
19
16
  freq = df[column].value_counts()
20
17
 
21
- # Calculate the 25th percentile (Q1) and 75th percentile (Q3)
22
18
  q1 = freq.quantile(0.25)
23
19
  q3 = freq.quantile(0.75)
24
20
 
25
- # Filter categories that are below the 25th percentile and above the 75th percentile
26
21
  least_frequent = freq[freq <= q1]
27
22
  most_frequent = freq[freq >= q3]
28
23
 
29
- # Get the least frequent category (25th percentile) and the most frequent category (75th percentile)
30
24
  least_frequent_category = least_frequent.idxmin() if not least_frequent.empty else None
31
25
  most_frequent_category = most_frequent.idxmax() if not most_frequent.empty else None
32
26
 
@@ -1,12 +1,10 @@
1
- import matplotlib.pyplot as plt
2
1
  import numpy as np
3
- from numpy import ndarray
4
2
 
5
3
  from likelihood.tools import cal_average
6
4
 
7
5
 
8
6
  class FeaturesArima:
9
- def forward(self, y_sum: ndarray, theta: list, mode: bool, noise: float):
7
+ def forward(self, y_sum: np.ndarray, theta: list, mode: bool, noise: float):
10
8
  if mode:
11
9
  y_vec = []
12
10
 
@@ -31,20 +29,14 @@ class FeaturesArima:
31
29
 
32
30
  return np.array(y_vec)
33
31
 
34
- def integrated(self, datapoints: ndarray):
32
+ def integrated(self, datapoints: np.ndarray):
35
33
  datapoints = self.datapoints
36
- # n = datapoints.shape[0]
37
-
38
- # y_sum = [
39
- # ((1.0 - datapoints[i - 1] / datapoints[i]) ** self.d) * datapoints[i]
40
- # for i in range(1, n)
41
- # ]
42
34
  y_sum = list(np.diff(datapoints, self.d))
43
35
  y_sum.insert(0, datapoints[0])
44
36
 
45
37
  return np.array(y_sum)
46
38
 
47
- def average(self, datapoints: ndarray):
39
+ def average(self, datapoints: np.ndarray):
48
40
  y_sum_average = cal_average(datapoints)
49
41
  y_sum_eps = datapoints - y_sum_average
50
42
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: likelihood
3
- Version: 1.2.24
3
+ Version: 1.3.0
4
4
  Summary: A package that performs the maximum likelihood algorithm.
5
5
  Home-page: https://github.com/jzsmoreno/likelihood/
6
6
  Author: J. A. Moreno-Guerra
@@ -13,7 +13,7 @@ Classifier: Operating System :: OS Independent
13
13
  Requires-Python: >=3.10
14
14
  Description-Content-Type: text/markdown
15
15
  License-File: LICENSE
16
- Requires-Dist: black[jupyter]==24.1.1
16
+ Requires-Dist: black[jupyter]>=24.3.0
17
17
  Requires-Dist: mypy-extensions==1.0.0
18
18
  Requires-Dist: types-openpyxl==3.1.0.15
19
19
  Requires-Dist: pydocstyle==6.3.0
@@ -31,6 +31,18 @@ Requires-Dist: pyvis; extra == "full"
31
31
  Requires-Dist: tensorflow==2.15.0; extra == "full"
32
32
  Requires-Dist: keras-tuner; extra == "full"
33
33
  Requires-Dist: scikit-learn; extra == "full"
34
+ Dynamic: author
35
+ Dynamic: author-email
36
+ Dynamic: classifier
37
+ Dynamic: description
38
+ Dynamic: description-content-type
39
+ Dynamic: home-page
40
+ Dynamic: maintainer
41
+ Dynamic: maintainer-email
42
+ Dynamic: provides-extra
43
+ Dynamic: requires-dist
44
+ Dynamic: requires-python
45
+ Dynamic: summary
34
46
 
35
47
  ![likelihood](https://raw.githubusercontent.com/RodolfoFerro/likelihood/main/likelihood.png)
36
48
 
@@ -1,4 +1,4 @@
1
- black[jupyter]==24.1.1
1
+ black[jupyter]>=24.3.0
2
2
  mypy-extensions==1.0.0
3
3
  types-openpyxl==3.1.0.15
4
4
  pydocstyle==6.3.0
@@ -1,309 +0,0 @@
1
- import logging
2
- import os
3
- from functools import partial
4
- from shutil import rmtree
5
-
6
- import keras_tuner
7
- import numpy as np
8
- import pandas as pd
9
- import tensorflow as tf
10
- from pandas.core.frame import DataFrame
11
-
12
- from likelihood.tools import OneHotEncoder
13
-
14
- logging.getLogger("tensorflow").setLevel(logging.ERROR)
15
-
16
- tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
17
-
18
-
19
- @tf.keras.utils.register_keras_serializable(package="Custom", name="AutoClassifier")
20
- class AutoClassifier(tf.keras.Model):
21
- """
22
- An auto-classifier model that automatically determines the best classification strategy based on the input data.
23
-
24
- Attributes:
25
- - input_shape_parm: The shape of the input data.
26
- - num_classes: The number of classes in the dataset.
27
- - units: The number of neurons in each hidden layer.
28
- - activation: The type of activation function to use for the neural network layers.
29
-
30
- Methods:
31
- __init__(self, input_shape_parm, num_classes, units, activation): Initializes an AutoClassifier instance with the given parameters.
32
- build(self, input_shape_parm): Builds the model architecture based on input_shape_parm.
33
- call(self, x): Defines the forward pass of the model.
34
- get_config(self): Returns the configuration of the model.
35
- from_config(cls, config): Recreates an instance of AutoClassifier from its configuration.
36
- """
37
-
38
- def __init__(self, input_shape_parm, num_classes, units, activation):
39
- """
40
- Initializes an AutoClassifier instance with the given parameters.
41
-
42
- Parameters
43
- ----------
44
- input_shape_parm : `int`
45
- The shape of the input data.
46
- num_classes : `int`
47
- The number of classes in the dataset.
48
- units : `int`
49
- The number of neurons in each hidden layer.
50
- activation : `str`
51
- The type of activation function to use for the neural network layers.
52
- """
53
- super(AutoClassifier, self).__init__()
54
- self.input_shape_parm = input_shape_parm
55
- self.num_classes = num_classes
56
- self.units = units
57
- self.activation = activation
58
-
59
- self.encoder = None
60
- self.decoder = None
61
- self.classifier = None
62
-
63
- def build(self, input_shape):
64
- self.encoder = tf.keras.Sequential(
65
- [
66
- tf.keras.layers.Dense(units=self.units, activation=self.activation),
67
- tf.keras.layers.Dense(units=int(self.units / 2), activation=self.activation),
68
- ]
69
- )
70
-
71
- self.decoder = tf.keras.Sequential(
72
- [
73
- tf.keras.layers.Dense(units=self.units, activation=self.activation),
74
- tf.keras.layers.Dense(units=self.input_shape_parm, activation=self.activation),
75
- ]
76
- )
77
-
78
- self.classifier = tf.keras.Sequential(
79
- [tf.keras.layers.Dense(self.num_classes, activation="softmax")]
80
- )
81
-
82
- def call(self, x):
83
- encoded = self.encoder(x)
84
- decoded = self.decoder(encoded)
85
- combined = tf.concat([decoded, encoded], axis=1)
86
- classification = self.classifier(combined)
87
- return classification
88
-
89
- def get_config(self):
90
- config = {
91
- "input_shape_parm": self.input_shape_parm,
92
- "num_classes": self.num_classes,
93
- "units": self.units,
94
- "activation": self.activation,
95
- }
96
- base_config = super(AutoClassifier, self).get_config()
97
- return dict(list(base_config.items()) + list(config.items()))
98
-
99
- @classmethod
100
- def from_config(cls, config):
101
- return cls(
102
- input_shape_parm=config["input_shape_parm"],
103
- num_classes=config["num_classes"],
104
- units=config["units"],
105
- activation=config["activation"],
106
- )
107
-
108
-
109
- def call_existing_code(
110
- units: int,
111
- activation: str,
112
- threshold: float,
113
- optimizer: str,
114
- input_shape_parm: None | int = None,
115
- num_classes: None | int = None,
116
- ) -> AutoClassifier:
117
- """
118
- Calls an existing AutoClassifier instance.
119
-
120
- Parameters
121
- ----------
122
- units : `int`
123
- The number of neurons in each hidden layer.
124
- activation : `str`
125
- The type of activation function to use for the neural network layers.
126
- threshold : `float`
127
- The threshold for the classifier.
128
- optimizer : `str`
129
- The type of optimizer to use for the neural network layers.
130
- input_shape_parm : `None` | `int`
131
- The shape of the input data.
132
- num_classes : `int`
133
- The number of classes in the dataset.
134
-
135
- Returns
136
- -------
137
- `AutoClassifier`
138
- The AutoClassifier instance.
139
- """
140
- model = AutoClassifier(
141
- input_shape_parm=input_shape_parm,
142
- num_classes=num_classes,
143
- units=units,
144
- activation=activation,
145
- )
146
- model.compile(
147
- optimizer=optimizer,
148
- loss=tf.keras.losses.CategoricalCrossentropy(),
149
- metrics=[tf.keras.metrics.F1Score(threshold=threshold)],
150
- )
151
- return model
152
-
153
-
154
- def build_model(hp, input_shape_parm: None | int, num_classes: None | int) -> AutoClassifier:
155
- """Builds a neural network model using Keras Tuner's search algorithm.
156
-
157
- Parameters
158
- ----------
159
- hp : `keras_tuner.HyperParameters`
160
- The hyperparameters to tune.
161
- input_shape_parm : `None` | `int`
162
- The shape of the input data.
163
- num_classes : `int`
164
- The number of classes in the dataset.
165
-
166
- Returns
167
- -------
168
- `keras.Model`
169
- The neural network model.
170
- """
171
- units = hp.Int(
172
- "units", min_value=int(input_shape_parm * 0.2), max_value=input_shape_parm, step=2
173
- )
174
- activation = hp.Choice("activation", ["sigmoid", "relu", "tanh", "selu", "softplus"])
175
- optimizer = hp.Choice("optimizer", ["sgd", "adam", "adadelta"])
176
- threshold = hp.Float("threshold", min_value=0.1, max_value=0.9, sampling="log")
177
-
178
- model = call_existing_code(
179
- units=units,
180
- activation=activation,
181
- threshold=threshold,
182
- optimizer=optimizer,
183
- input_shape_parm=input_shape_parm,
184
- num_classes=num_classes,
185
- )
186
- return model
187
-
188
-
189
- def setup_model(
190
- data: DataFrame,
191
- target: str,
192
- epochs: int,
193
- train_size: float = 0.7,
194
- seed=None,
195
- train_mode: bool = True,
196
- filepath: str = "./my_dir/best_model",
197
- **kwargs,
198
- ) -> AutoClassifier:
199
- """Setup model for training and tuning.
200
-
201
- Parameters
202
- ----------
203
- data : `DataFrame`
204
- The dataset to train the model on.
205
- target : `str`
206
- The name of the target column.
207
- epochs : `int`
208
- The number of epochs to train the model for.
209
- train_size : `float`
210
- The proportion of the dataset to use for training.
211
- seed : `Any` | `int`
212
- The random seed to use for reproducibility.
213
- train_mode : `bool`
214
- Whether to train the model or not.
215
- filepath : `str`
216
- The path to save the best model to.
217
-
218
- Keyword Arguments:
219
- ----------
220
- Additional keyword arguments to pass to the model.
221
-
222
- max_trials : `int`
223
- The maximum number of trials to perform.
224
- directory : `str`
225
- The directory to save the model to.
226
- project_name : `str`
227
- The name of the project.
228
- objective : `str`
229
- The objective to optimize.
230
- verbose : `bool`
231
- Whether to print verbose output.
232
-
233
- Returns
234
- -------
235
- model : `AutoClassifier`
236
- The trained model.
237
- """
238
- max_trials = kwargs["max_trials"] if "max_trials" in kwargs else 10
239
- directory = kwargs["directory"] if "directory" in kwargs else "./my_dir"
240
- project_name = kwargs["project_name"] if "project_name" in kwargs else "get_best"
241
- objective = kwargs["objective"] if "objective" in kwargs else "val_loss"
242
- verbose = kwargs["verbose"] if "verbose" in kwargs else True
243
-
244
- X = data.drop(columns=target)
245
- input_sample = X.sample(1)
246
- y = data[target]
247
- # Verify if there are categorical columns in the dataframe
248
- assert (
249
- X.select_dtypes(include=["object"]).empty == True
250
- ), "Categorical variables within the DataFrame must be encoded, this is done by using the DataFrameEncoder from likelihood."
251
- validation_split = 1.0 - train_size
252
- # Create my_dir path if it does not exist
253
-
254
- if train_mode:
255
- # Create a new directory if it does not exist
256
- try:
257
- if (not os.path.exists(directory)) and directory != "./":
258
- os.makedirs(directory)
259
- elif directory != "./":
260
- print(f"Directory {directory} already exists, it will be deleted.")
261
- rmtree(directory)
262
- os.makedirs(directory)
263
- except:
264
- print("Warning: unable to create directory")
265
-
266
- # Create a Classifier instance
267
- y_encoder = OneHotEncoder()
268
- y = y_encoder.encode(y.to_list())
269
- X = X.to_numpy()
270
- input_sample.to_numpy()
271
- X = np.asarray(X).astype(np.float32)
272
- input_sample = np.asarray(input_sample).astype(np.float32)
273
- y = np.asarray(y).astype(np.float32)
274
-
275
- input_shape_parm = X.shape[1]
276
- num_classes = y.shape[1]
277
- global build_model
278
- build_model = partial(
279
- build_model, input_shape_parm=input_shape_parm, num_classes=num_classes
280
- )
281
-
282
- # Create the AutoKeras model
283
- tuner = keras_tuner.RandomSearch(
284
- hypermodel=build_model,
285
- objective=objective,
286
- max_trials=max_trials,
287
- directory=directory,
288
- project_name=project_name,
289
- seed=seed,
290
- )
291
-
292
- tuner.search(X, y, epochs=epochs, validation_split=validation_split)
293
- models = tuner.get_best_models(num_models=2)
294
- best_model = models[0]
295
- best_model(input_sample)
296
-
297
- # save model
298
- best_model.save(filepath, save_format="tf")
299
-
300
- if verbose:
301
- tuner.results_summary()
302
- else:
303
- # Load the best model from the directory
304
- best_model = tf.keras.models.load_model(filepath)
305
-
306
- return best_model
307
-
308
-
309
- ########################################################################################
File without changes
File without changes
File without changes
File without changes