likelihood 1.2.17__py3-none-any.whl → 1.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  from functools import partial
3
+ from shutil import rmtree
3
4
 
4
5
  import keras_tuner
5
6
  import numpy as np
@@ -15,26 +16,26 @@ class AutoClassifier(tf.keras.Model):
15
16
  An auto-classifier model that automatically determines the best classification strategy based on the input data.
16
17
 
17
18
  Attributes:
18
- - input_shape: The shape of the input data.
19
+ - input_shape_parm: The shape of the input data.
19
20
  - num_classes: The number of classes in the dataset.
20
21
  - units: The number of neurons in each hidden layer.
21
22
  - activation: The type of activation function to use for the neural network layers.
22
23
 
23
24
  Methods:
24
- __init__(self, input_shape, num_classes, units, activation): Initializes an AutoClassifier instance with the given parameters.
25
- build(self, input_shape): Builds the model architecture based on input_shape.
25
+ __init__(self, input_shape_parm, num_classes, units, activation): Initializes an AutoClassifier instance with the given parameters.
26
+ build(self, input_shape_parm): Builds the model architecture based on input_shape_parm.
26
27
  call(self, x): Defines the forward pass of the model.
27
28
  get_config(self): Returns the configuration of the model.
28
29
  from_config(cls, config): Recreates an instance of AutoClassifier from its configuration.
29
30
  """
30
31
 
31
- def __init__(self, input_shape, num_classes, units, activation):
32
+ def __init__(self, input_shape_parm, num_classes, units, activation):
32
33
  """
33
34
  Initializes an AutoClassifier instance with the given parameters.
34
35
 
35
36
  Parameters
36
37
  ----------
37
- input_shape : `int`
38
+ input_shape_parm : `int`
38
39
  The shape of the input data.
39
40
  num_classes : `int`
40
41
  The number of classes in the dataset.
@@ -44,7 +45,7 @@ class AutoClassifier(tf.keras.Model):
44
45
  The type of activation function to use for the neural network layers.
45
46
  """
46
47
  super(AutoClassifier, self).__init__()
47
- self.input_shape = input_shape
48
+ self.input_shape_parm = input_shape_parm
48
49
  self.num_classes = num_classes
49
50
  self.units = units
50
51
  self.activation = activation
@@ -53,7 +54,7 @@ class AutoClassifier(tf.keras.Model):
53
54
  self.decoder = None
54
55
  self.classifier = None
55
56
 
56
- def build(self, input_shape):
57
+ def build(self, input_shape_parm):
57
58
  self.encoder = tf.keras.Sequential(
58
59
  [
59
60
  tf.keras.layers.Dense(units=self.units, activation=self.activation),
@@ -64,7 +65,7 @@ class AutoClassifier(tf.keras.Model):
64
65
  self.decoder = tf.keras.Sequential(
65
66
  [
66
67
  tf.keras.layers.Dense(units=self.units, activation=self.activation),
67
- tf.keras.layers.Dense(units=self.input_shape, activation=self.activation),
68
+ tf.keras.layers.Dense(units=self.input_shape_parm, activation=self.activation),
68
69
  ]
69
70
  )
70
71
 
@@ -81,7 +82,7 @@ class AutoClassifier(tf.keras.Model):
81
82
 
82
83
  def get_config(self):
83
84
  config = {
84
- "input_shape": self.input_shape,
85
+ "input_shape_parm": self.input_shape_parm,
85
86
  "num_classes": self.num_classes,
86
87
  "units": self.units,
87
88
  "activation": self.activation,
@@ -92,7 +93,7 @@ class AutoClassifier(tf.keras.Model):
92
93
  @classmethod
93
94
  def from_config(cls, config):
94
95
  return cls(
95
- input_shape=config["input_shape"],
96
+ input_shape_parm=config["input_shape_parm"],
96
97
  num_classes=config["num_classes"],
97
98
  units=config["units"],
98
99
  activation=config["activation"],
@@ -104,7 +105,7 @@ def call_existing_code(
104
105
  activation: str,
105
106
  threshold: float,
106
107
  optimizer: str,
107
- input_shape: None | int = None,
108
+ input_shape_parm: None | int = None,
108
109
  num_classes: None | int = None,
109
110
  ) -> AutoClassifier:
110
111
  """
@@ -120,7 +121,7 @@ def call_existing_code(
120
121
  The threshold for the classifier.
121
122
  optimizer : `str`
122
123
  The type of optimizer to use for the neural network layers.
123
- input_shape : `None` | `int`
124
+ input_shape_parm : `None` | `int`
124
125
  The shape of the input data.
125
126
  num_classes : `int`
126
127
  The number of classes in the dataset.
@@ -131,7 +132,10 @@ def call_existing_code(
131
132
  The AutoClassifier instance.
132
133
  """
133
134
  model = AutoClassifier(
134
- input_shape=input_shape, num_classes=num_classes, units=units, activation=activation
135
+ input_shape_parm=input_shape_parm,
136
+ num_classes=num_classes,
137
+ units=units,
138
+ activation=activation,
135
139
  )
136
140
  model.compile(
137
141
  optimizer=optimizer,
@@ -141,14 +145,14 @@ def call_existing_code(
141
145
  return model
142
146
 
143
147
 
144
- def build_model(hp, input_shape: None | int, num_classes: None | int) -> AutoClassifier:
148
+ def build_model(hp, input_shape_parm: None | int, num_classes: None | int) -> AutoClassifier:
145
149
  """Builds a neural network model using Keras Tuner's search algorithm.
146
150
 
147
151
  Parameters
148
152
  ----------
149
153
  hp : `keras_tuner.HyperParameters`
150
154
  The hyperparameters to tune.
151
- input_shape : `None` | `int`
155
+ input_shape_parm : `None` | `int`
152
156
  The shape of the input data.
153
157
  num_classes : `int`
154
158
  The number of classes in the dataset.
@@ -158,7 +162,9 @@ def build_model(hp, input_shape: None | int, num_classes: None | int) -> AutoCla
158
162
  `keras.Model`
159
163
  The neural network model.
160
164
  """
161
- units = hp.Int("units", min_value=int(input_shape * 0.2), max_value=input_shape, step=2)
165
+ units = hp.Int(
166
+ "units", min_value=int(input_shape_parm * 0.2), max_value=input_shape_parm, step=2
167
+ )
162
168
  activation = hp.Choice("activation", ["sigmoid", "relu", "tanh", "selu", "softplus"])
163
169
  optimizer = hp.Choice("optimizer", ["sgd", "adam", "adadelta"])
164
170
  threshold = hp.Float("threshold", min_value=0.1, max_value=0.9, sampling="log")
@@ -168,7 +174,7 @@ def build_model(hp, input_shape: None | int, num_classes: None | int) -> AutoCla
168
174
  activation=activation,
169
175
  threshold=threshold,
170
176
  optimizer=optimizer,
171
- input_shape=input_shape,
177
+ input_shape_parm=input_shape_parm,
172
178
  num_classes=num_classes,
173
179
  )
174
180
  return model
@@ -180,8 +186,9 @@ def setup_model(
180
186
  epochs: int,
181
187
  train_size: float = 0.7,
182
188
  seed=None,
189
+ train_mode: bool = True,
183
190
  filepath: str = "./my_dir/best_model.keras",
184
- **kwargs
191
+ **kwargs,
185
192
  ) -> AutoClassifier:
186
193
  """Setup model for training and tuning.
187
194
 
@@ -197,6 +204,8 @@ def setup_model(
197
204
  The proportion of the dataset to use for training.
198
205
  seed : `Any` | `int`
199
206
  The random seed to use for reproducibility.
207
+ train_mode : `bool`
208
+ Whether to train the model or not.
200
209
  filepath : `str`
201
210
  The path to save the best model to.
202
211
 
@@ -234,8 +243,18 @@ def setup_model(
234
243
  ), "Categorical variables within the DataFrame must be encoded, this is done by using the DataFrameEncoder from likelihood."
235
244
  validation_split = 1.0 - train_size
236
245
  # Create my_dir path if it does not exist
237
- if not os.path.exists(directory):
238
- os.makedirs(directory)
246
+
247
+ if train_mode:
248
+ # Create a new directory if it does not exist
249
+ try:
250
+ if not os.path.exists(directory):
251
+ os.makedirs(directory)
252
+ else:
253
+ print(f"Directory {directory} already exists, it will be deleted.")
254
+ rmtree(directory)
255
+ os.makedirs(directory)
256
+ except:
257
+ print("Warning: unable to create directory")
239
258
 
240
259
  # Create a Classifier instance
241
260
  y_encoder = OneHotEncoder()
@@ -245,10 +264,12 @@ def setup_model(
245
264
 
246
265
  y = np.asarray(y).astype(np.float32)
247
266
 
248
- input_shape = X.shape[1]
267
+ input_shape_parm = X.shape[1]
249
268
  num_classes = y.shape[1]
250
269
  global build_model
251
- build_model = partial(build_model, input_shape=input_shape, num_classes=num_classes)
270
+ build_model = partial(
271
+ build_model, input_shape_parm=input_shape_parm, num_classes=num_classes
272
+ )
252
273
 
253
274
  # Create the AutoKeras model
254
275
  tuner = keras_tuner.RandomSearch(
@@ -10,53 +10,65 @@ from likelihood.tools import DataScaler, FeatureSelection, OneHotEncoder, check_
10
10
 
11
11
 
12
12
  class SimulationEngine(FeatureSelection):
13
+ """
14
+ This class implements a predictive model that utilizes multiple linear regression for numerical target variables
15
+ and multiple logistic regression for categorical target variables.
13
16
 
14
- def __init__(self, df: DataFrame, n_importances: int, **kwargs):
17
+ The class provides methods for training the model on a given dataset, making predictions,
18
+ and evaluating the model's performance.
19
+
20
+ Key features:
21
+ - Supports both numerical and categorical target variables, automatically selecting the appropriate regression method.
22
+ - Includes methods for data preprocessing, model fitting, prediction, and evaluation metrics.
23
+ - Designed to be flexible and user-friendly, allowing for easy integration with various datasets.
24
+
25
+ Usage:
26
+ - Instantiate the class with the training data and target variable.
27
+ - Call the fit method to train the model.
28
+ - Use the predict method to generate predictions on new data.
29
+ - Evaluate the model using built-in metrics for accuracy and error.
30
+
31
+ This class is suitable for applications in data analysis and machine learning, enabling users to leverage regression techniques
32
+ for both numerical and categorical outcomes efficiently.
33
+ """
34
+
35
+ def __init__(self, df: DataFrame, n_importances: int, use_scaler: bool = False, **kwargs):
15
36
 
16
37
  self.df = df
17
38
  self.n_importances = n_importances
39
+ self.use_scaler = use_scaler
18
40
 
19
41
  super().__init__(**kwargs)
20
42
 
21
- def predict(self, df: DataFrame, column: str, n: int = None) -> ndarray | list:
22
-
23
- # We clean the data set
24
- df = self._clean_data(df)
25
-
43
+ def predict(self, df: DataFrame, column: str) -> ndarray | list:
26
44
  # Let us assign the dictionary entries corresponding to the column
27
45
  w, quick_encoder, names_cols, dfe, numeric_dict = self.w_dict[column]
28
46
 
29
- try:
30
- df = df[names_cols].copy()
31
- # Change the scale of the dataframe
32
- numeric_df = df.select_dtypes(include="number")
47
+ df = df[names_cols].copy()
48
+ # Change the scale of the dataframe
49
+ dataset = self.df.copy()
50
+ dataset.drop(columns=column, inplace=True)
51
+ numeric_df = dataset.select_dtypes(include="number")
52
+ if self.use_scaler:
33
53
  scaler = DataScaler(numeric_df.copy().to_numpy().T, n=None)
34
- numeric_scaled = scaler.rescale()
54
+ _ = scaler.rescale()
55
+ dataset_ = df.copy()
56
+ numeric_df = dataset_.select_dtypes(include="number")
57
+ numeric_scaled = scaler.rescale(dataset_=numeric_df.to_numpy())
35
58
  numeric_df = pd.DataFrame(numeric_scaled.T, columns=numeric_df.columns)
36
- df[numeric_df.columns] = numeric_df
37
-
38
- # Encoding the datadrame
39
- for num, colname in enumerate(dfe._encode_columns):
40
- if df[colname].dtype == "object":
41
- encode_dict = dfe.encoding_list[num]
42
- df[colname] = df[colname].apply(
43
- dfe._code_transformation_to, dictionary_list=encode_dict
44
- )
45
-
46
- except:
47
- print("The dataframe provided does not have the same columns as in the fit method.")
48
-
49
- # Assign value to n if n is None
50
- n = n if n != None else len(df)
51
-
52
- # Generation of assertion
53
- assert n > 0 and n <= len(df), '"n" must be interger or "<= len(df)".'
59
+ for col in numeric_df.columns:
60
+ df[col] = numeric_df[col].values
54
61
 
55
- # Sample dataframe
56
- df_aux = df.sample(n)
62
+ # Encoding the datadrame
63
+ for num, colname in enumerate(dfe._encode_columns):
64
+ if df[colname].dtype == "object":
65
+ encode_dict = dfe.encoding_list[num]
66
+ df[colname] = df[colname].apply(
67
+ dfe._code_transformation_to, dictionary_list=encode_dict
68
+ )
57
69
 
58
70
  # PREDICTION
59
- y = df_aux.to_numpy() @ w
71
+ y = df.to_numpy() @ w
60
72
 
61
73
  # Categorical column
62
74
  if quick_encoder != None:
@@ -67,18 +79,18 @@ class SimulationEngine(FeatureSelection):
67
79
  y = [encoding_dic[item] for item in y]
68
80
  # Numeric column
69
81
  else:
70
- # scale output
71
- i = numeric_dict[column]
72
- y += 1
73
- y /= 2
74
- y = y * self.scaler.values[1][i]
82
+ if self.use_scaler:
83
+ # scale output
84
+ y += 1
85
+ y /= 2
86
+ y = y * (self.df[column].max() - self.df[column].min())
75
87
 
76
- return y
88
+ return y[:]
77
89
 
78
90
  def fit(self, **kwargs) -> None:
79
91
 
80
92
  # We run the feature selection algorithm
81
- self.get_digraph(self.df, self.n_importances)
93
+ self.get_digraph(self.df, self.n_importances, self.use_scaler)
82
94
 
83
95
  def _clean_data(self, df: DataFrame) -> DataFrame:
84
96
 
@@ -1,14 +1,14 @@
1
1
  from typing import Dict
2
2
 
3
3
  import numpy as np
4
+ import pandas as pd
4
5
  from numpy import arange, array, ndarray, random
5
6
  from numpy.linalg import solve
6
7
  from pandas.core.frame import DataFrame
7
8
 
8
- # -------------------------------------------------------------------------
9
-
10
9
 
11
- def xi_corr(df: DataFrame) -> DataFrame:
10
+ # -------------------------------------------------------------------------
11
+ def xi_corr(df: pd.DataFrame) -> pd.DataFrame:
12
12
  """Calculate new coefficient of correlation for all pairs of columns in a `DataFrame`.
13
13
 
14
14
  Parameters
@@ -19,11 +19,15 @@ def xi_corr(df: DataFrame) -> DataFrame:
19
19
  Returns
20
20
  -------
21
21
  `DataFrame`
22
- A dataframe with variable names as keys and their corresponding
23
- correlation coefficients as values.
22
+ A square dataframe with variable names as both index and columns,
23
+ containing their corresponding correlation coefficients.
24
24
  """
25
- correlations = {}
26
- columns = df.columns
25
+
26
+ columns = df.select_dtypes(include="number").columns
27
+ n = len(columns)
28
+
29
+ # Initialize a square matrix for the correlations
30
+ correlations = pd.DataFrame(1.0, index=columns, columns=columns)
27
31
 
28
32
  for i, col1 in enumerate(columns):
29
33
  for j, col2 in enumerate(columns):
@@ -32,9 +36,9 @@ def xi_corr(df: DataFrame) -> DataFrame:
32
36
  y = df[col2].values
33
37
 
34
38
  correlation = xicor(x, y)
35
- correlations[(col1, col2)] = round(correlation, 8)
36
- # dictionary to dataframe
37
- correlations = DataFrame(list(correlations.items()), columns=["Variables", "Xi Correlation"])
39
+ correlations.loc[col1, col2] = round(correlation, 8)
40
+ correlations.loc[col2, col1] = round(correlation, 8) # Mirror the correlation
41
+
38
42
  return correlations
39
43
 
40
44
 
@@ -51,10 +55,11 @@ def xi_corr(df: DataFrame) -> DataFrame:
51
55
  """
52
56
 
53
57
 
54
- def xicor(X: ndarray, Y: ndarray, ties: bool = True) -> float:
55
- """Calculate a new coefficient of correlation between two variables.
58
+ def xicor(X: np.ndarray, Y: np.ndarray, ties: bool = True, random_seed: int = None) -> float:
59
+ """
60
+ Calculate a generalized coefficient of correlation between two variables.
56
61
 
57
- The new coefficient of correlation is a generalization of Pearson's correlation.
62
+ This coefficient is an extension of Pearson's correlation, accounting for ties with optional randomization.
58
63
 
59
64
  Parameters
60
65
  ----------
@@ -62,30 +67,52 @@ def xicor(X: ndarray, Y: ndarray, ties: bool = True) -> float:
62
67
  The first variable to be correlated. Must have at least one dimension.
63
68
  Y : `np.ndarray`
64
69
  The second variable to be correlated. Must have at least one dimension.
70
+ ties : bool
71
+ Whether to handle ties using randomization.
72
+ random_seed : int, optional
73
+ Seed for the random number generator for reproducibility.
65
74
 
66
75
  Returns
67
76
  -------
68
77
  xi : `float`
69
78
  The estimated value of the new coefficient of correlation.
70
79
  """
71
- random.seed(42)
80
+
81
+ # Early return for identical arrays
82
+ if np.array_equal(X, Y):
83
+ return 1.0
84
+
72
85
  n = len(X)
73
- order = array([i[0] for i in sorted(enumerate(X), key=lambda x: x[1])])
86
+
87
+ # Early return for cases with less than 2 elements
88
+ if n < 2:
89
+ return 0.0
90
+
91
+ # Flatten the input arrays if they are multidimensional
92
+ X = X.flatten()
93
+ Y = Y.flatten()
94
+
95
+ # Get the sorted order of X
96
+ order = np.argsort(X)
97
+
74
98
  if ties:
75
- l = array([sum(y >= Y[order]) for y in Y[order]])
76
- r = l.copy()
77
- for j in range(n):
78
- if sum([r[j] == r[i] for i in range(n)]) > 1:
79
- tie_index = array([r[j] == r[i] for i in range(n)])
80
- r[tie_index] = random.choice(
81
- r[tie_index] - arange(0, sum([r[j] == r[i] for i in range(n)])),
82
- sum(tie_index),
83
- replace=False,
84
- )
85
- return 1 - n * sum(abs(r[1:] - r[: n - 1])) / (2 * sum(l * (n - l)))
99
+ np.random.seed(random_seed) # Set seed for reproducibility if needed
100
+ ranks = np.argsort(np.argsort(Y[order])) # Get ranks
101
+ unique_ranks, counts = np.unique(ranks, return_counts=True)
102
+
103
+ # Adjust ranks for ties by shuffling
104
+ for rank, count in zip(unique_ranks, counts):
105
+ if count > 1:
106
+ tie_indices = np.where(ranks == rank)[0]
107
+ np.random.shuffle(ranks[tie_indices]) # Randomize ties
108
+
109
+ cumulative_counts = np.array([np.sum(y >= Y[order]) for y in Y[order]])
110
+ return 1 - n * np.sum(np.abs(ranks[1:] - ranks[: n - 1])) / (
111
+ 2 * np.sum(cumulative_counts * (n - cumulative_counts))
112
+ )
86
113
  else:
87
- r = array([sum(y >= Y[order]) for y in Y[order]])
88
- return 1 - 3 * sum(abs(r[1:] - r[: n - 1])) / (n**2 - 1)
114
+ ranks = np.argsort(np.argsort(Y[order])) # Get ranks without randomization
115
+ return 1 - 3 * np.sum(np.abs(ranks[1:] - ranks[: n - 1])) / (n**2 - 1)
89
116
 
90
117
 
91
118
  # -------------------------------------------------------------------------
@@ -257,8 +284,8 @@ if __name__ == "__main__":
257
284
  print("New correlation coefficient test")
258
285
  X = np.random.rand(100, 1)
259
286
  Y = X * X
260
- print("coefficient for Y = X * X : ", xicor(X, Y))
261
-
287
+ print("coefficient for Y = X * X : ", xicor(X, Y, False))
288
+ df["index"] = ["A", "B", "C", "D"]
262
289
  print("New correlation coefficient test for pandas DataFrame")
263
290
  values_df = xi_corr(df)
264
291
  breakpoint()
likelihood/tools/tools.py CHANGED
@@ -640,14 +640,14 @@ def cal_average(y: ndarray, alpha: float = 1):
640
640
  class DataScaler:
641
641
  """numpy array `scaler` and `rescaler`"""
642
642
 
643
- __slots__ = ["dataset_", "_n", "data_scaled", "values", "transpose"]
643
+ __slots__ = ["dataset_", "_n", "data_scaled", "values", "transpose", "inv_fitting"]
644
644
 
645
645
  def __init__(self, dataset: ndarray, n: int = 1) -> None:
646
646
  """Initializes the parameters required for scaling the data"""
647
647
  self.dataset_ = dataset.copy()
648
648
  self._n = n
649
649
 
650
- def rescale(self) -> ndarray:
650
+ def rescale(self, dataset_: ndarray | None = None) -> ndarray:
651
651
  """Perform a standard rescaling of the data
652
652
 
653
653
  Returns
@@ -655,11 +655,26 @@ class DataScaler:
655
655
  data_scaled : `np.array`
656
656
  An array containing the scaled data.
657
657
  """
658
+ if isinstance(dataset_, ndarray):
659
+ data_scaled = np.copy(dataset_)
660
+ mu = self.values[0]
661
+ sigma = self.values[1]
662
+ f = self.values[2]
663
+ data_scaled = data_scaled.reshape((self.dataset_.shape[0], -1))
664
+ for i in range(self.dataset_.shape[0]):
665
+ if self._n != None:
666
+ poly = f[i](self.inv_fitting[i](data_scaled[i]))
667
+ data_scaled[i] += -poly
668
+ data_scaled[i] = 2 * ((data_scaled[i] - mu[i]) / sigma[i]) - 1
669
+ return data_scaled
670
+ else:
671
+ self.data_scaled = np.copy(self.dataset_.copy())
658
672
 
659
673
  mu = []
660
674
  sigma = []
661
675
  fitting = []
662
- self.data_scaled = np.copy(self.dataset_)
676
+ self.inv_fitting = []
677
+
663
678
  try:
664
679
  xaxis = range(self.dataset_.shape[1])
665
680
  except:
@@ -675,12 +690,15 @@ class DataScaler:
675
690
  for i in range(self.dataset_.shape[0]):
676
691
  if self._n != None:
677
692
  fit = np.polyfit(xaxis, self.dataset_[i, :], self._n)
693
+ inv_fit = np.polyfit(self.dataset_[i, :], xaxis, self._n)
678
694
  f = np.poly1d(fit)
679
695
  poly = f(xaxis)
680
696
  fitting.append(f)
697
+ self.inv_fitting.append(inv_fit)
681
698
  self.data_scaled[i, :] += -poly
682
699
  else:
683
700
  fitting.append(0.0)
701
+ self.inv_fitting.append(0.0)
684
702
  mu.append(np.min(self.data_scaled[i, :]))
685
703
  if np.max(self.data_scaled[i, :]) != 0:
686
704
  sigma.append(np.max(self.data_scaled[i, :]) - mu[i])
@@ -1064,7 +1082,7 @@ class FeatureSelection:
1064
1082
  self.all_features_imp_graph: List[Tuple] = []
1065
1083
  self.w_dict = dict()
1066
1084
 
1067
- def get_digraph(self, dataset: DataFrame, n_importances: int) -> str:
1085
+ def get_digraph(self, dataset: DataFrame, n_importances: int, use_scaler: bool = False) -> str:
1068
1086
  """
1069
1087
  Get directed graph showing importance of features.
1070
1088
 
@@ -1092,10 +1110,11 @@ class FeatureSelection:
1092
1110
  feature_string += column + "; "
1093
1111
 
1094
1112
  numeric_df = curr_dataset.select_dtypes(include="number")
1095
- self.scaler = DataScaler(numeric_df.copy().to_numpy().T, n=None)
1096
- numeric_scaled = self.scaler.rescale()
1097
- numeric_df = pd.DataFrame(numeric_scaled.T, columns=numeric_df.columns)
1098
- curr_dataset[numeric_df.columns] = numeric_df
1113
+ if use_scaler:
1114
+ self.scaler = DataScaler(numeric_df.copy().to_numpy().T, n=None)
1115
+ numeric_scaled = self.scaler.rescale()
1116
+ numeric_df = pd.DataFrame(numeric_scaled.T, columns=numeric_df.columns)
1117
+ curr_dataset[numeric_df.columns] = numeric_df
1099
1118
 
1100
1119
  # We construct dictionary to save index for scaling
1101
1120
  numeric_dict = dict(zip(list(numeric_df.columns), range(len(list(numeric_df.columns)))))
@@ -1119,7 +1138,6 @@ class FeatureSelection:
1119
1138
  dfe = DataFrameEncoder(X_aux)
1120
1139
  encoded_df = dfe.encode(save_mode=False)
1121
1140
  # We train
1122
-
1123
1141
  Model.fit(encoded_df.to_numpy().T, Y.to_numpy().T)
1124
1142
  # We obtain importance
1125
1143
  importance = Model.get_importances()
@@ -1202,7 +1220,7 @@ class FeatureSelection:
1202
1220
 
1203
1221
 
1204
1222
  def check_nan_inf(df: DataFrame) -> DataFrame:
1205
- """Check for `NaN` and `Inf` values in the `DataFrame`. If any are found removes them."""
1223
+ """Checks for `NaN` and `Inf` values in the `DataFrame`. If any are found they will be removed."""
1206
1224
  nan_values = df.isnull().values.any()
1207
1225
  count = np.isinf(df.select_dtypes(include="number")).values.sum()
1208
1226
  print("There are null values : ", nan_values)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: likelihood
3
- Version: 1.2.17
3
+ Version: 1.2.18
4
4
  Summary: A package that performs the maximum likelihood algorithm.
5
5
  Home-page: https://github.com/jzsmoreno/likelihood/
6
6
  Author: J. A. Moreno-Guerra
@@ -28,7 +28,7 @@ Requires-Dist: corner
28
28
  Provides-Extra: full
29
29
  Requires-Dist: networkx ; extra == 'full'
30
30
  Requires-Dist: pyvis ; extra == 'full'
31
- Requires-Dist: tensorflow ; extra == 'full'
31
+ Requires-Dist: tensorflow ==2.15.0 ; extra == 'full'
32
32
  Requires-Dist: keras-tuner ; extra == 'full'
33
33
  Requires-Dist: scikit-learn ; extra == 'full'
34
34
 
@@ -5,15 +5,15 @@ likelihood/graph/graph.py,sha256=wKJqgxXiSbnvzyW3SjhQVrqp00yKMHf3ph6CIDNVhNM,289
5
5
  likelihood/graph/nn.py,sha256=XqTnAHzXP0jSdLd0IOFjVZUZTcQU-XppsZLmJrG2GMo,12372
6
6
  likelihood/models/__init__.py,sha256=e6nB4w47w0Q9DrAFeP3OcUgcoHOtf7Il4mBhgf4AARg,52
7
7
  likelihood/models/regression.py,sha256=9cakyGlJCEO6WfpoKLh3GxdXQeQp7cUvJIkQ5odT0TA,9404
8
- likelihood/models/simulation.py,sha256=KYdVjt2PaLo04g8kBsRGQJ5AKMBaQVUH3orZE_TXTy8,2960
8
+ likelihood/models/simulation.py,sha256=mdgQPg_LEY5svPaF4TFv-DoQRE2oP2ig_uXnwINtewM,4039
9
9
  likelihood/models/utils.py,sha256=VtEj07lV-GRoWraQgpfjU0jTt1Ntf9MXgYwe6XYQh20,1552
10
10
  likelihood/models/deep/__init__.py,sha256=-KIPippVaMqgG8mEgYjNxYQdqOUcFhUuKhbVe8TTCfo,28
11
- likelihood/models/deep/autoencoders.py,sha256=kyqyH_GZuImvCAdvKeQqxGUhfs2-xdt2MzDX4jfDHDY,8953
11
+ likelihood/models/deep/autoencoders.py,sha256=lUvFQ7lbjvIPR_IKFnK5VCrSa419P5dOaTL3qSHntJk,9623
12
12
  likelihood/tools/__init__.py,sha256=MCjsCWfBNKE2uMN0VizDN1uFzZ_md0X2WZeBdWhrCR8,50
13
- likelihood/tools/numeric_tools.py,sha256=EQD959b56aovi4PI_og0BITgyUONgDUU9LG9YqNgX70,7554
14
- likelihood/tools/tools.py,sha256=B1_xRZeO2fUSCVUvdkhlB6zO9dGzIglSknydLv7VCEc,41627
15
- likelihood-1.2.17.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
16
- likelihood-1.2.17.dist-info/METADATA,sha256=h2BYri6oepaP2iCwAyln149xMS5NZ5pHVhsPChTOyNo,2509
17
- likelihood-1.2.17.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
18
- likelihood-1.2.17.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
19
- likelihood-1.2.17.dist-info/RECORD,,
13
+ likelihood/tools/numeric_tools.py,sha256=cPTPgdww2ofxfyhJDomqvtXDgsSDs9iRQ7GHLt5Vl6M,8457
14
+ likelihood/tools/tools.py,sha256=O39aPxTNsaBVSJFIkNsUESNSkfG4C7GG77wcR51a8IQ,42543
15
+ likelihood-1.2.18.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
16
+ likelihood-1.2.18.dist-info/METADATA,sha256=8nAjAwwqCDw8K9IBzKG2cgBU5DOLAA-N-RIlr02eyjU,2518
17
+ likelihood-1.2.18.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
18
+ likelihood-1.2.18.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
19
+ likelihood-1.2.18.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.1.0)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5