likelihood 1.2.24__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
likelihood/graph/graph.py CHANGED
@@ -45,8 +45,8 @@ class DynamicGraph(FeatureSelection):
45
45
 
46
46
  def draw(self, name="graph.html", **kwargs) -> None:
47
47
  """Display the network using HTML format"""
48
- spring_length = kwargs["spring_length"] if "spring_length" in kwargs else 500
49
- node_distance = kwargs["node_distance"] if "node_distance" in kwargs else 100
48
+ spring_length = kwargs.get("spring_length", 500)
49
+ node_distance = kwargs.get("node_distance", 100)
50
50
  self.G.repulsion(node_distance=node_distance, spring_length=spring_length)
51
51
  self.G.show_buttons(filter_=["physics"])
52
52
  self.G.show(name)
@@ -89,5 +89,5 @@ if __name__ == "__main__":
89
89
  df["y"] = y
90
90
  # Instantiate DynamicGraph
91
91
  fs = DynamicGraph(df, n_importances=2)
92
- print(fs.fit())
92
+ fs.fit()
93
93
  fs.draw()
likelihood/graph/nn.py CHANGED
@@ -1,9 +1,9 @@
1
+ import logging
1
2
  import os
2
3
 
3
- os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
4
- # Suppress TensorFlow INFO logs
5
- os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
6
- import logging
4
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
5
+ logging.getLogger("tensorflow").setLevel(logging.ERROR)
6
+
7
7
  import warnings
8
8
  from typing import List, Tuple
9
9
 
@@ -17,9 +17,7 @@ from sklearn.model_selection import train_test_split
17
17
 
18
18
  from likelihood.tools import generate_feature_yaml
19
19
 
20
- logging.getLogger("tensorflow").setLevel(logging.ERROR)
21
-
22
- tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
20
+ tf.get_logger().setLevel("ERROR")
23
21
 
24
22
 
25
23
  def compare_similarity(arr1: np.ndarray, arr2: np.ndarray) -> int:
@@ -98,7 +96,7 @@ def cal_adjacency_matrix(
98
96
 
99
97
  assert len(df_categorical) > 0
100
98
 
101
- similarity = kwargs["similarity"] if "similarity" in kwargs else len(df_categorical.columns) - 1
99
+ similarity = kwargs.get("similarity", len(df_categorical.columns) - 1)
102
100
  assert similarity <= df_categorical.shape[1]
103
101
 
104
102
  adj_dict = {}
@@ -1,19 +1,40 @@
1
1
  import logging
2
2
  import os
3
+ import random
3
4
  from functools import partial
4
5
  from shutil import rmtree
5
6
 
6
- import keras_tuner
7
+ import matplotlib
8
+ import matplotlib.colors as mcolors
9
+ import matplotlib.pyplot as plt
7
10
  import numpy as np
8
11
  import pandas as pd
12
+ from pandas.plotting import radviz
13
+
14
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
15
+ logging.getLogger("tensorflow").setLevel(logging.ERROR)
16
+
17
+ import warnings
18
+ from functools import wraps
19
+
20
+ import keras_tuner
9
21
  import tensorflow as tf
10
22
  from pandas.core.frame import DataFrame
23
+ from sklearn.manifold import TSNE
11
24
 
12
25
  from likelihood.tools import OneHotEncoder
13
26
 
14
- logging.getLogger("tensorflow").setLevel(logging.ERROR)
27
+ tf.get_logger().setLevel("ERROR")
28
+
29
+
30
+ def suppress_warnings(func):
31
+ @wraps(func)
32
+ def wrapper(*args, **kwargs):
33
+ with warnings.catch_warnings():
34
+ warnings.simplefilter("ignore")
35
+ return func(*args, **kwargs)
15
36
 
16
- tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
37
+ return wrapper
17
38
 
18
39
 
19
40
  @tf.keras.utils.register_keras_serializable(package="Custom", name="AutoClassifier")
@@ -35,7 +56,7 @@ class AutoClassifier(tf.keras.Model):
35
56
  from_config(cls, config): Recreates an instance of AutoClassifier from its configuration.
36
57
  """
37
58
 
38
- def __init__(self, input_shape_parm, num_classes, units, activation):
59
+ def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
39
60
  """
40
61
  Initializes an AutoClassifier instance with the given parameters.
41
62
 
@@ -49,6 +70,17 @@ class AutoClassifier(tf.keras.Model):
49
70
  The number of neurons in each hidden layer.
50
71
  activation : `str`
51
72
  The type of activation function to use for the neural network layers.
73
+
74
+ Keyword Arguments:
75
+ ----------
76
+ Additional keyword arguments to pass to the model.
77
+
78
+ classifier_activation : `str`
79
+ The activation function to use for the classifier layer. Default is "softmax". If the activation function is not a classification function, the model can be used in regression problems.
80
+ num_layers : `int`
81
+ The number of hidden layers in the classifier. Default is 1.
82
+ dropout : `float`
83
+ The dropout rate to use in the classifier. Default is None.
52
84
  """
53
85
  super(AutoClassifier, self).__init__()
54
86
  self.input_shape_parm = input_shape_parm
@@ -59,6 +91,9 @@ class AutoClassifier(tf.keras.Model):
59
91
  self.encoder = None
60
92
  self.decoder = None
61
93
  self.classifier = None
94
+ self.classifier_activation = kwargs.get("classifier_activation", "softmax")
95
+ self.num_layers = kwargs.get("num_layers", 1)
96
+ self.dropout = kwargs.get("dropout", None)
62
97
 
63
98
  def build(self, input_shape):
64
99
  self.encoder = tf.keras.Sequential(
@@ -75,8 +110,16 @@ class AutoClassifier(tf.keras.Model):
75
110
  ]
76
111
  )
77
112
 
78
- self.classifier = tf.keras.Sequential(
79
- [tf.keras.layers.Dense(self.num_classes, activation="softmax")]
113
+ self.classifier = tf.keras.Sequential()
114
+ if self.num_layers > 1:
115
+ for _ in range(self.num_layers - 1):
116
+ self.classifier.add(
117
+ tf.keras.layers.Dense(units=self.units, activation=self.activation)
118
+ )
119
+ if self.dropout:
120
+ self.classifier.add(tf.keras.layers.Dropout(self.dropout))
121
+ self.classifier.add(
122
+ tf.keras.layers.Dense(units=self.num_classes, activation=self.classifier_activation)
80
123
  )
81
124
 
82
125
  def call(self, x):
@@ -92,6 +135,9 @@ class AutoClassifier(tf.keras.Model):
92
135
  "num_classes": self.num_classes,
93
136
  "units": self.units,
94
137
  "activation": self.activation,
138
+ "classifier_activation": self.classifier_activation,
139
+ "num_layers": self.num_layers,
140
+ "dropout": self.dropout,
95
141
  }
96
142
  base_config = super(AutoClassifier, self).get_config()
97
143
  return dict(list(base_config.items()) + list(config.items()))
@@ -103,6 +149,9 @@ class AutoClassifier(tf.keras.Model):
103
149
  num_classes=config["num_classes"],
104
150
  units=config["units"],
105
151
  activation=config["activation"],
152
+ classifier_activation=config["classifier_activation"],
153
+ num_layers=config["num_layers"],
154
+ dropout=config["dropout"],
106
155
  )
107
156
 
108
157
 
@@ -113,6 +162,8 @@ def call_existing_code(
113
162
  optimizer: str,
114
163
  input_shape_parm: None | int = None,
115
164
  num_classes: None | int = None,
165
+ num_layers: int = 1,
166
+ **kwargs,
116
167
  ) -> AutoClassifier:
117
168
  """
118
169
  Calls an existing AutoClassifier instance.
@@ -137,11 +188,14 @@ def call_existing_code(
137
188
  `AutoClassifier`
138
189
  The AutoClassifier instance.
139
190
  """
191
+ dropout = kwargs.get("dropout", None)
140
192
  model = AutoClassifier(
141
193
  input_shape_parm=input_shape_parm,
142
194
  num_classes=num_classes,
143
195
  units=units,
144
196
  activation=activation,
197
+ num_layers=num_layers,
198
+ dropout=dropout,
145
199
  )
146
200
  model.compile(
147
201
  optimizer=optimizer,
@@ -151,7 +205,9 @@ def call_existing_code(
151
205
  return model
152
206
 
153
207
 
154
- def build_model(hp, input_shape_parm: None | int, num_classes: None | int) -> AutoClassifier:
208
+ def build_model(
209
+ hp, input_shape_parm: None | int, num_classes: None | int, **kwargs
210
+ ) -> AutoClassifier:
155
211
  """Builds a neural network model using Keras Tuner's search algorithm.
156
212
 
157
213
  Parameters
@@ -163,17 +219,56 @@ def build_model(hp, input_shape_parm: None | int, num_classes: None | int) -> Au
163
219
  num_classes : `int`
164
220
  The number of classes in the dataset.
165
221
 
222
+ Keyword Arguments:
223
+ ----------
224
+ Additional keyword arguments to pass to the model.
225
+
226
+ hyperparameters : `dict`
227
+ The hyperparameters to set.
228
+
166
229
  Returns
167
230
  -------
168
231
  `keras.Model`
169
232
  The neural network model.
170
233
  """
171
- units = hp.Int(
172
- "units", min_value=int(input_shape_parm * 0.2), max_value=input_shape_parm, step=2
234
+ hyperparameters = kwargs.get("hyperparameters", None)
235
+ hyperparameters_keys = hyperparameters.keys() if hyperparameters is not None else []
236
+
237
+ units = (
238
+ hp.Int(
239
+ "units",
240
+ min_value=int(input_shape_parm * 0.2),
241
+ max_value=int(input_shape_parm * 1.5),
242
+ step=2,
243
+ )
244
+ if "units" not in hyperparameters_keys
245
+ else hyperparameters["units"]
246
+ )
247
+ activation = (
248
+ hp.Choice("activation", ["sigmoid", "relu", "tanh", "selu", "softplus", "softsign"])
249
+ if "activation" not in hyperparameters_keys
250
+ else hyperparameters["activation"]
251
+ )
252
+ optimizer = (
253
+ hp.Choice("optimizer", ["sgd", "adam", "adadelta", "rmsprop", "adamax", "adagrad"])
254
+ if "optimizer" not in hyperparameters_keys
255
+ else hyperparameters["optimizer"]
256
+ )
257
+ threshold = (
258
+ hp.Float("threshold", min_value=0.1, max_value=0.9, sampling="log")
259
+ if "threshold" not in hyperparameters_keys
260
+ else hyperparameters["threshold"]
261
+ )
262
+ num_layers = (
263
+ hp.Int("num_layers", min_value=1, max_value=10, step=1)
264
+ if "num_layers" not in hyperparameters_keys
265
+ else hyperparameters["num_layers"]
266
+ )
267
+ dropout = (
268
+ hp.Float("dropout", min_value=0.1, max_value=0.9, sampling="log")
269
+ if "dropout" not in hyperparameters_keys
270
+ else hyperparameters["dropout"]
173
271
  )
174
- activation = hp.Choice("activation", ["sigmoid", "relu", "tanh", "selu", "softplus"])
175
- optimizer = hp.Choice("optimizer", ["sgd", "adam", "adadelta"])
176
- threshold = hp.Float("threshold", min_value=0.1, max_value=0.9, sampling="log")
177
272
 
178
273
  model = call_existing_code(
179
274
  units=units,
@@ -182,10 +277,13 @@ def build_model(hp, input_shape_parm: None | int, num_classes: None | int) -> Au
182
277
  optimizer=optimizer,
183
278
  input_shape_parm=input_shape_parm,
184
279
  num_classes=num_classes,
280
+ num_layers=num_layers,
281
+ dropout=dropout,
185
282
  )
186
283
  return model
187
284
 
188
285
 
286
+ @suppress_warnings
189
287
  def setup_model(
190
288
  data: DataFrame,
191
289
  target: str,
@@ -194,6 +292,7 @@ def setup_model(
194
292
  seed=None,
195
293
  train_mode: bool = True,
196
294
  filepath: str = "./my_dir/best_model",
295
+ method: str = "Hyperband",
197
296
  **kwargs,
198
297
  ) -> AutoClassifier:
199
298
  """Setup model for training and tuning.
@@ -214,6 +313,8 @@ def setup_model(
214
313
  Whether to train the model or not.
215
314
  filepath : `str`
216
315
  The path to save the best model to.
316
+ method : `str`
317
+ The method to use for hyperparameter tuning. Options are "Hyperband" and "RandomSearch".
217
318
 
218
319
  Keyword Arguments:
219
320
  ----------
@@ -229,30 +330,30 @@ def setup_model(
229
330
  The objective to optimize.
230
331
  verbose : `bool`
231
332
  Whether to print verbose output.
333
+ hyperparameters : `dict`
334
+ The hyperparameters to set.
232
335
 
233
336
  Returns
234
337
  -------
235
338
  model : `AutoClassifier`
236
339
  The trained model.
237
340
  """
238
- max_trials = kwargs["max_trials"] if "max_trials" in kwargs else 10
239
- directory = kwargs["directory"] if "directory" in kwargs else "./my_dir"
240
- project_name = kwargs["project_name"] if "project_name" in kwargs else "get_best"
241
- objective = kwargs["objective"] if "objective" in kwargs else "val_loss"
242
- verbose = kwargs["verbose"] if "verbose" in kwargs else True
341
+ max_trials = kwargs.get("max_trials", 10)
342
+ directory = kwargs.get("directory", "./my_dir")
343
+ project_name = kwargs.get("project_name", "get_best")
344
+ objective = kwargs.get("objective", "val_loss")
345
+ verbose = kwargs.get("verbose", True)
346
+ hyperparameters = kwargs.get("hyperparameters", None)
243
347
 
244
348
  X = data.drop(columns=target)
245
349
  input_sample = X.sample(1)
246
350
  y = data[target]
247
- # Verify if there are categorical columns in the dataframe
248
351
  assert (
249
352
  X.select_dtypes(include=["object"]).empty == True
250
353
  ), "Categorical variables within the DataFrame must be encoded, this is done by using the DataFrameEncoder from likelihood."
251
354
  validation_split = 1.0 - train_size
252
- # Create my_dir path if it does not exist
253
355
 
254
356
  if train_mode:
255
- # Create a new directory if it does not exist
256
357
  try:
257
358
  if (not os.path.exists(directory)) and directory != "./":
258
359
  os.makedirs(directory)
@@ -263,7 +364,6 @@ def setup_model(
263
364
  except:
264
365
  print("Warning: unable to create directory")
265
366
 
266
- # Create a Classifier instance
267
367
  y_encoder = OneHotEncoder()
268
368
  y = y_encoder.encode(y.to_list())
269
369
  X = X.to_numpy()
@@ -276,34 +376,242 @@ def setup_model(
276
376
  num_classes = y.shape[1]
277
377
  global build_model
278
378
  build_model = partial(
279
- build_model, input_shape_parm=input_shape_parm, num_classes=num_classes
280
- )
281
-
282
- # Create the AutoKeras model
283
- tuner = keras_tuner.RandomSearch(
284
- hypermodel=build_model,
285
- objective=objective,
286
- max_trials=max_trials,
287
- directory=directory,
288
- project_name=project_name,
289
- seed=seed,
379
+ build_model,
380
+ input_shape_parm=input_shape_parm,
381
+ num_classes=num_classes,
382
+ hyperparameters=hyperparameters,
290
383
  )
291
384
 
292
- tuner.search(X, y, epochs=epochs, validation_split=validation_split)
385
+ if method == "Hyperband":
386
+ tuner = keras_tuner.Hyperband(
387
+ hypermodel=build_model,
388
+ objective=objective,
389
+ max_epochs=epochs,
390
+ factor=3,
391
+ directory=directory,
392
+ project_name=project_name,
393
+ seed=seed,
394
+ )
395
+ elif method == "RandomSearch":
396
+ tuner = keras_tuner.RandomSearch(
397
+ hypermodel=build_model,
398
+ objective=objective,
399
+ max_trials=max_trials,
400
+ directory=directory,
401
+ project_name=project_name,
402
+ seed=seed,
403
+ )
404
+
405
+ tuner.search(X, y, epochs=epochs, validation_split=validation_split, verbose=verbose)
293
406
  models = tuner.get_best_models(num_models=2)
294
407
  best_model = models[0]
295
408
  best_model(input_sample)
296
409
 
297
- # save model
298
410
  best_model.save(filepath, save_format="tf")
299
411
 
300
412
  if verbose:
301
413
  tuner.results_summary()
302
414
  else:
303
- # Load the best model from the directory
304
415
  best_model = tf.keras.models.load_model(filepath)
305
416
 
306
- return best_model
417
+ best_hps = tuner.get_best_hyperparameters(1)[0].values
418
+ return best_model, pd.DataFrame(best_hps, index=["Value"])
419
+
420
+
421
+ class GetInsights:
422
+ def __init__(self, model: AutoClassifier, inputs: np.ndarray) -> None:
423
+ self.inputs = inputs
424
+ self.model = model
425
+ self.encoder_layer = self.model.encoder.layers[0]
426
+ self.decoder_layer = self.model.decoder.layers[0]
427
+ self.encoder_weights = self.encoder_layer.get_weights()[0]
428
+ self.decoder_weights = self.decoder_layer.get_weights()[0]
429
+ colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)
430
+
431
+ by_hsv = sorted(
432
+ (tuple(mcolors.rgb_to_hsv(mcolors.to_rgba(color)[:3])), name)
433
+ for name, color in colors.items()
434
+ )
435
+ self.sorted_names = [name for hsv, name in by_hsv if hsv[1] > 0.4 and hsv[2] >= 0.4]
436
+ random.shuffle(self.sorted_names)
437
+
438
+ def predictor_analyzer(
439
+ self,
440
+ frac=None,
441
+ cmap: str = "viridis",
442
+ aspect: str = "auto",
443
+ highlight: bool = True,
444
+ **kwargs,
445
+ ) -> None:
446
+ self._viz_weights(cmap=cmap, aspect=aspect, highlight=highlight, **kwargs)
447
+ inputs = self.inputs.copy()
448
+ y_labels = kwargs.get("y_labels", None)
449
+ if frac:
450
+ n = int(frac * self.inputs.shape[0])
451
+ indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
452
+ inputs = inputs[indexes]
453
+ inputs[np.isnan(inputs)] = 0.0
454
+ encoded = self.model.encoder(inputs)
455
+ reconstructed = self.model.decoder(encoded)
456
+ combined = tf.concat([reconstructed, encoded], axis=1)
457
+ self.classification = self.model.classifier(combined).numpy().argmax(axis=1)
458
+ ax = plt.subplot(1, 2, 1)
459
+ plt.imshow(self.inputs, cmap=cmap, aspect=aspect)
460
+ plt.colorbar()
461
+ plt.title("Original Data")
462
+ plt.subplot(1, 2, 2, sharex=ax, sharey=ax)
463
+ plt.imshow(reconstructed, cmap=cmap, aspect=aspect)
464
+ plt.colorbar()
465
+ plt.title("Decoder Layer Reconstruction")
466
+ plt.show()
467
+
468
+ self._get_tsne_repr(inputs=inputs, frac=frac)
469
+ self._viz_tsne_repr(c=self.classification)
470
+
471
+ self.data = pd.DataFrame(encoded, columns=[f"Feature {i}" for i in range(encoded.shape[1])])
472
+ self.data_input = pd.DataFrame(
473
+ inputs,
474
+ columns=(
475
+ [f"Feature {i}" for i in range(inputs.shape[1])] if y_labels is None else y_labels
476
+ ),
477
+ )
478
+ self.data["class"] = self.classification
479
+ self.data_input["class"] = self.classification
480
+ radviz(self.data, "class", color=self.colors)
481
+ plt.title("Radviz Visualization of Latent Space")
482
+ plt.show()
483
+
484
+ radviz(self.data_input, "class", color=self.colors)
485
+ plt.title("Radviz Visualization of Input Data")
486
+ plt.show()
487
+ return self._statistics(self.data_input)
488
+
489
+ def _statistics(self, data_input: DataFrame, **kwargs) -> DataFrame:
490
+ data = data_input.copy(deep=True)
491
+
492
+ if not pd.api.types.is_string_dtype(data["class"]):
493
+ data["class"] = data["class"].astype(str)
494
+
495
+ data.ffill(inplace=True)
496
+ grouped_data = data.groupby("class")
497
+
498
+ numerical_stats = grouped_data.agg(["mean", "min", "max", "std", "median"])
499
+ numerical_stats.columns = ["_".join(col).strip() for col in numerical_stats.columns.values]
500
+
501
+ def get_mode(x):
502
+ mode_series = x.mode()
503
+ return mode_series.iloc[0] if not mode_series.empty else None
504
+
505
+ mode_stats = grouped_data.apply(get_mode, include_groups=False)
506
+ mode_stats.columns = [f"{col}_mode" for col in mode_stats.columns]
507
+ combined_stats = pd.concat([numerical_stats, mode_stats], axis=1)
508
+
509
+ return combined_stats.T
510
+
511
+ def _viz_weights(
512
+ self, cmap: str = "viridis", aspect: str = "auto", highlight: bool = True, **kwargs
513
+ ) -> None:
514
+ title = kwargs.get("title", "Encoder Layer Weights (Dense Layer)")
515
+ y_labels = kwargs.get("y_labels", None)
516
+ cmap_highlight = kwargs.get("cmap_highlight", "Pastel1")
517
+ highlight_mask = np.zeros_like(self.encoder_weights, dtype=bool)
518
+
519
+ plt.imshow(self.encoder_weights, cmap=cmap, aspect=aspect)
520
+ plt.colorbar()
521
+ plt.title(title)
522
+ if y_labels is not None:
523
+ plt.yticks(ticks=np.arange(self.encoder_weights.shape[0]), labels=y_labels)
524
+ if highlight:
525
+ for i, j in enumerate(self.encoder_weights.argmax(axis=1)):
526
+ highlight_mask[i, j] = True
527
+ plt.imshow(
528
+ np.ma.masked_where(~highlight_mask, self.encoder_weights),
529
+ cmap=cmap_highlight,
530
+ alpha=0.5,
531
+ aspect=aspect,
532
+ )
533
+ plt.show()
534
+
535
+ def _get_tsne_repr(self, inputs=None, frac=None) -> None:
536
+ if inputs is None:
537
+ inputs = self.inputs.copy()
538
+ if frac:
539
+ n = int(frac * self.inputs.shape[0])
540
+ indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
541
+ inputs = inputs[indexes]
542
+ inputs[np.isnan(inputs)] = 0.0
543
+ self.latent_representations = inputs @ self.encoder_weights
544
+
545
+ tsne = TSNE(n_components=2)
546
+ self.reduced_data_tsne = tsne.fit_transform(self.latent_representations)
547
+
548
+ def _viz_tsne_repr(self, **kwargs) -> None:
549
+ c = kwargs.get("c", None)
550
+ self.colors = (
551
+ kwargs.get("colors", self.sorted_names[: len(np.unique(c))]) if c is not None else None
552
+ )
553
+ plt.scatter(
554
+ self.reduced_data_tsne[:, 0],
555
+ self.reduced_data_tsne[:, 1],
556
+ cmap=matplotlib.colors.ListedColormap(self.colors) if c is not None else None,
557
+ c=c,
558
+ )
559
+ if c is not None:
560
+ cb = plt.colorbar()
561
+ loc = np.arange(0, max(c), max(c) / float(len(self.colors)))
562
+ cb.set_ticks(loc)
563
+ cb.set_ticklabels(np.unique(c))
564
+ plt.title("t-SNE Visualization of Latent Space")
565
+ plt.xlabel("t-SNE 1")
566
+ plt.ylabel("t-SNE 2")
567
+ plt.show()
307
568
 
308
569
 
309
570
  ########################################################################################
571
+
572
+ if __name__ == "__main__":
573
+ # Example usage
574
+ import pandas as pd
575
+ from sklearn.datasets import load_iris
576
+ from sklearn.preprocessing import OneHotEncoder
577
+
578
+ # Load the dataset
579
+ iris = load_iris()
580
+
581
+ # Convert to a DataFrame for easy exploration
582
+ iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
583
+ iris_df["species"] = iris.target
584
+
585
+ X = iris_df.drop(columns="species")
586
+ y_labels = X.columns
587
+ X = X.values
588
+ y = iris_df["species"].values
589
+
590
+ X = np.asarray(X).astype(np.float32)
591
+
592
+ encoder = OneHotEncoder()
593
+ y = encoder.fit_transform(y.reshape(-1, 1)).toarray()
594
+ y = np.asarray(y).astype(np.float32)
595
+
596
+ model = AutoClassifier(
597
+ input_shape_parm=X.shape[1],
598
+ num_classes=3,
599
+ units=27,
600
+ activation="tanh",
601
+ num_layers=2,
602
+ dropout=0.2,
603
+ )
604
+ model.compile(
605
+ optimizer="adam",
606
+ loss=tf.keras.losses.CategoricalCrossentropy(),
607
+ metrics=[tf.keras.metrics.F1Score(threshold=0.5)],
608
+ )
609
+ model.fit(X, y, epochs=50, validation_split=0.2)
610
+
611
+ insights = GetInsights(model, X)
612
+ summary = insights.predictor_analyzer(frac=1.0, y_labels=y_labels)
613
+ insights._get_tsne_repr()
614
+ insights._viz_tsne_repr()
615
+ insights._viz_tsne_repr(c=iris_df["species"])
616
+ insights._viz_weights()
617
+ print(summary)
@@ -2,31 +2,25 @@ import pickle
2
2
  import warnings
3
3
  from typing import List, Tuple, Union
4
4
 
5
- import matplotlib.pyplot as plt
6
5
  import numpy as np
7
6
  import pandas as pd
8
7
  from pandas.core.frame import DataFrame
9
8
 
10
9
  from likelihood.tools import DataScaler, FeatureSelection, OneHotEncoder, cdf, check_nan_inf
11
10
 
12
- # Suppress RankWarning
13
11
  warnings.simplefilter("ignore", np.RankWarning)
14
12
 
15
13
 
16
14
  # --------------------------------------------------------------------------------------------------------------------------------------
17
15
  def categories_by_quartile(df: DataFrame, column: str) -> Tuple[str, str]:
18
- # Count the frequency of each category in the column
19
16
  freq = df[column].value_counts()
20
17
 
21
- # Calculate the 25th percentile (Q1) and 75th percentile (Q3)
22
18
  q1 = freq.quantile(0.25)
23
19
  q3 = freq.quantile(0.75)
24
20
 
25
- # Filter categories that are below the 25th percentile and above the 75th percentile
26
21
  least_frequent = freq[freq <= q1]
27
22
  most_frequent = freq[freq >= q3]
28
23
 
29
- # Get the least frequent category (25th percentile) and the most frequent category (75th percentile)
30
24
  least_frequent_category = least_frequent.idxmin() if not least_frequent.empty else None
31
25
  most_frequent_category = most_frequent.idxmax() if not most_frequent.empty else None
32
26
 
@@ -1,12 +1,10 @@
1
- import matplotlib.pyplot as plt
2
1
  import numpy as np
3
- from numpy import ndarray
4
2
 
5
3
  from likelihood.tools import cal_average
6
4
 
7
5
 
8
6
  class FeaturesArima:
9
- def forward(self, y_sum: ndarray, theta: list, mode: bool, noise: float):
7
+ def forward(self, y_sum: np.ndarray, theta: list, mode: bool, noise: float):
10
8
  if mode:
11
9
  y_vec = []
12
10
 
@@ -31,20 +29,14 @@ class FeaturesArima:
31
29
 
32
30
  return np.array(y_vec)
33
31
 
34
- def integrated(self, datapoints: ndarray):
32
+ def integrated(self, datapoints: np.ndarray):
35
33
  datapoints = self.datapoints
36
- # n = datapoints.shape[0]
37
-
38
- # y_sum = [
39
- # ((1.0 - datapoints[i - 1] / datapoints[i]) ** self.d) * datapoints[i]
40
- # for i in range(1, n)
41
- # ]
42
34
  y_sum = list(np.diff(datapoints, self.d))
43
35
  y_sum.insert(0, datapoints[0])
44
36
 
45
37
  return np.array(y_sum)
46
38
 
47
- def average(self, datapoints: ndarray):
39
+ def average(self, datapoints: np.ndarray):
48
40
  y_sum_average = cal_average(datapoints)
49
41
  y_sum_eps = datapoints - y_sum_average
50
42
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: likelihood
3
- Version: 1.2.24
3
+ Version: 1.3.0
4
4
  Summary: A package that performs the maximum likelihood algorithm.
5
5
  Home-page: https://github.com/jzsmoreno/likelihood/
6
6
  Author: J. A. Moreno-Guerra
@@ -13,7 +13,7 @@ Classifier: Operating System :: OS Independent
13
13
  Requires-Python: >=3.10
14
14
  Description-Content-Type: text/markdown
15
15
  License-File: LICENSE
16
- Requires-Dist: black[jupyter]==24.1.1
16
+ Requires-Dist: black[jupyter]>=24.3.0
17
17
  Requires-Dist: mypy-extensions==1.0.0
18
18
  Requires-Dist: types-openpyxl==3.1.0.15
19
19
  Requires-Dist: pydocstyle==6.3.0
@@ -31,6 +31,18 @@ Requires-Dist: pyvis; extra == "full"
31
31
  Requires-Dist: tensorflow==2.15.0; extra == "full"
32
32
  Requires-Dist: keras-tuner; extra == "full"
33
33
  Requires-Dist: scikit-learn; extra == "full"
34
+ Dynamic: author
35
+ Dynamic: author-email
36
+ Dynamic: classifier
37
+ Dynamic: description
38
+ Dynamic: description-content-type
39
+ Dynamic: home-page
40
+ Dynamic: maintainer
41
+ Dynamic: maintainer-email
42
+ Dynamic: provides-extra
43
+ Dynamic: requires-dist
44
+ Dynamic: requires-python
45
+ Dynamic: summary
34
46
 
35
47
  ![likelihood](https://raw.githubusercontent.com/RodolfoFerro/likelihood/main/likelihood.png)
36
48
 
@@ -1,20 +1,20 @@
1
1
  likelihood/__init__.py,sha256=5C0hapdsk85XZhN_rssRAEFpkRRuKNtj6cyRbqD2_gM,994
2
2
  likelihood/main.py,sha256=fcCkGOOWKjfvw2tLVqjuKPV8t0rVCIT9FlbYcOv4EYo,7974
3
3
  likelihood/graph/__init__.py,sha256=6TuFDfmXTwpLyHl7_KqBfdzW6zqHjGzIFvymjFPlvjI,21
4
- likelihood/graph/graph.py,sha256=hGWCznxaRQ8BfY2aLjrvwriZkAIsz5ydKXF4x_7b0EQ,3359
5
- likelihood/graph/nn.py,sha256=3HihXchK4FQcp0j-pzTO36RSNg7EjzhuXYY_8M3C2G0,12366
4
+ likelihood/graph/graph.py,sha256=bLrNMvIh7GOTdPTwnNss8oPZ7cbSHQScAsH_ttmVUK0,3294
5
+ likelihood/graph/nn.py,sha256=-OvHAeB3l2nd0ZeAk03cVDGBgaTn-WyGIsj5Rq7XeCY,12237
6
6
  likelihood/models/__init__.py,sha256=e6nB4w47w0Q9DrAFeP3OcUgcoHOtf7Il4mBhgf4AARg,52
7
7
  likelihood/models/hmm.py,sha256=0s0gFySH1u4NjRaZDxiZ8oeTaFhFrw1x0GJxwy3dFrA,6253
8
8
  likelihood/models/regression.py,sha256=9cakyGlJCEO6WfpoKLh3GxdXQeQp7cUvJIkQ5odT0TA,9404
9
- likelihood/models/simulation.py,sha256=L_9Mihcca7i_AnvWWrZilFV8VEhz_Z8fDLepmwBGSi8,8832
10
- likelihood/models/utils.py,sha256=VtEj07lV-GRoWraQgpfjU0jTt1Ntf9MXgYwe6XYQh20,1552
9
+ likelihood/models/simulation.py,sha256=LFyE_szo7sDukviMLeg_6RoyAaI7yMXUy8f4mDOrGoc,8460
10
+ likelihood/models/utils.py,sha256=dvigPi_hxcs5ntfHr7Y1JvP5ULtMW3kkN0nJpS4orE8,1319
11
11
  likelihood/models/deep/__init__.py,sha256=-KIPippVaMqgG8mEgYjNxYQdqOUcFhUuKhbVe8TTCfo,28
12
- likelihood/models/deep/autoencoders.py,sha256=2P--nS96XwMi44q0OIxvIp6Mdbt-B4LqwCSXTn2jYrY,10070
12
+ likelihood/models/deep/autoencoders.py,sha256=KtEQhYhZcEUALjWuYeTtb2ASurluHcWzKl6c7kS6E78,21135
13
13
  likelihood/tools/__init__.py,sha256=MCjsCWfBNKE2uMN0VizDN1uFzZ_md0X2WZeBdWhrCR8,50
14
14
  likelihood/tools/numeric_tools.py,sha256=FA44kbiAcxcquz1el_g3Pqsp5ii8XFkAIrsMs5bGkj0,11445
15
15
  likelihood/tools/tools.py,sha256=iZBC7IHTFpAyxooyel7ZFi-5-G0nCotNLLtxenPw9T8,44303
16
- likelihood-1.2.24.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
17
- likelihood-1.2.24.dist-info/METADATA,sha256=Z6fUcQ3cU1oL8_o6px8uidolXPhlnivmztoZQpvlx8o,2504
18
- likelihood-1.2.24.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
19
- likelihood-1.2.24.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
20
- likelihood-1.2.24.dist-info/RECORD,,
16
+ likelihood-1.3.0.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
17
+ likelihood-1.3.0.dist-info/METADATA,sha256=7-V4936jT_W1GHOxbaiBrM7uZhRzHCsxycGGxNq1fR0,2770
18
+ likelihood-1.3.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
19
+ likelihood-1.3.0.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
20
+ likelihood-1.3.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.6.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5