PyPI - likelihood - Versions diffs - 1.2.17__tar.gz → 1.2.18__tar.gz - Mend

likelihood 1.2.17tar.gz → 1.2.18tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{likelihood-1.2.17 → likelihood-1.2.18}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: likelihood
-Version: 1.2.17
+Version: 1.2.18
 Summary: A package that performs the maximum likelihood algorithm.
 Home-page: https://github.com/jzsmoreno/likelihood/
 Author: J. A. Moreno-Guerra
@@ -28,7 +28,7 @@ Requires-Dist: corner
 Provides-Extra: full
 Requires-Dist: networkx; extra == "full"
 Requires-Dist: pyvis; extra == "full"
-Requires-Dist: tensorflow; extra == "full"
+Requires-Dist: tensorflow==2.15.0; extra == "full"
 Requires-Dist: keras-tuner; extra == "full"
 Requires-Dist: scikit-learn; extra == "full"

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood/models/deep/autoencoders.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import os
 from functools import partial
+from shutil import rmtree
 import keras_tuner
 import numpy as np
@@ -15,26 +16,26 @@ class AutoClassifier(tf.keras.Model):
     An auto-classifier model that automatically determines the best classification strategy based on the input data.
     Attributes:
-        - input_shape: The shape of the input data.
+        - input_shape_parm: The shape of the input data.
         - num_classes: The number of classes in the dataset.
         - units: The number of neurons in each hidden layer.
         - activation: The type of activation function to use for the neural network layers.
     Methods:
-        __init__(self, input_shape, num_classes, units, activation): Initializes an AutoClassifier instance with the given parameters.
-        build(self, input_shape): Builds the model architecture based on input_shape.
+        __init__(self, input_shape_parm, num_classes, units, activation): Initializes an AutoClassifier instance with the given parameters.
+        build(self, input_shape_parm): Builds the model architecture based on input_shape_parm.
         call(self, x): Defines the forward pass of the model.
         get_config(self): Returns the configuration of the model.
         from_config(cls, config): Recreates an instance of AutoClassifier from its configuration.
     """
-    def __init__(self, input_shape, num_classes, units, activation):
+    def __init__(self, input_shape_parm, num_classes, units, activation):
         """
         Initializes an AutoClassifier instance with the given parameters.
         Parameters
         ----------
-        input_shape : `int`
+        input_shape_parm : `int`
             The shape of the input data.
         num_classes : `int`
             The number of classes in the dataset.
@@ -44,7 +45,7 @@ class AutoClassifier(tf.keras.Model):
             The type of activation function to use for the neural network layers.
         """
         super(AutoClassifier, self).__init__()
-        self.input_shape = input_shape
+        self.input_shape_parm = input_shape_parm
         self.num_classes = num_classes
         self.units = units
         self.activation = activation
@@ -53,7 +54,7 @@ class AutoClassifier(tf.keras.Model):
         self.decoder = None
         self.classifier = None
-    def build(self, input_shape):
+    def build(self, input_shape_parm):
         self.encoder = tf.keras.Sequential(
             [
                 tf.keras.layers.Dense(units=self.units, activation=self.activation),
@@ -64,7 +65,7 @@ class AutoClassifier(tf.keras.Model):
         self.decoder = tf.keras.Sequential(
             [
                 tf.keras.layers.Dense(units=self.units, activation=self.activation),
-                tf.keras.layers.Dense(units=self.input_shape, activation=self.activation),
+                tf.keras.layers.Dense(units=self.input_shape_parm, activation=self.activation),
             ]
         )
@@ -81,7 +82,7 @@ class AutoClassifier(tf.keras.Model):
     def get_config(self):
         config = {
-            "input_shape": self.input_shape,
+            "input_shape_parm": self.input_shape_parm,
             "num_classes": self.num_classes,
             "units": self.units,
             "activation": self.activation,
@@ -92,7 +93,7 @@ class AutoClassifier(tf.keras.Model):
     @classmethod
     def from_config(cls, config):
         return cls(
-            input_shape=config["input_shape"],
+            input_shape_parm=config["input_shape_parm"],
             num_classes=config["num_classes"],
             units=config["units"],
             activation=config["activation"],
@@ -104,7 +105,7 @@ def call_existing_code(
     activation: str,
     threshold: float,
     optimizer: str,
-    input_shape: None | int = None,
+    input_shape_parm: None | int = None,
     num_classes: None | int = None,
 ) -> AutoClassifier:
     """
@@ -120,7 +121,7 @@ def call_existing_code(
         The threshold for the classifier.
     optimizer : `str`
         The type of optimizer to use for the neural network layers.
-    input_shape : `None` | `int`
+    input_shape_parm : `None` | `int`
         The shape of the input data.
     num_classes : `int`
         The number of classes in the dataset.
@@ -131,7 +132,10 @@ def call_existing_code(
         The AutoClassifier instance.
     """
     model = AutoClassifier(
-        input_shape=input_shape, num_classes=num_classes, units=units, activation=activation
+        input_shape_parm=input_shape_parm,
+        num_classes=num_classes,
+        units=units,
+        activation=activation,
     )
     model.compile(
         optimizer=optimizer,
@@ -141,14 +145,14 @@ def call_existing_code(
     return model
-def build_model(hp, input_shape: None | int, num_classes: None | int) -> AutoClassifier:
+def build_model(hp, input_shape_parm: None | int, num_classes: None | int) -> AutoClassifier:
     """Builds a neural network model using Keras Tuner's search algorithm.
     Parameters
     ----------
     hp : `keras_tuner.HyperParameters`
         The hyperparameters to tune.
-    input_shape : `None` | `int`
+    input_shape_parm : `None` | `int`
         The shape of the input data.
     num_classes : `int`
         The number of classes in the dataset.
@@ -158,7 +162,9 @@ def build_model(hp, input_shape: None | int, num_classes: None | int) -> AutoCla
     `keras.Model`
         The neural network model.
     """
-    units = hp.Int("units", min_value=int(input_shape * 0.2), max_value=input_shape, step=2)
+    units = hp.Int(
+        "units", min_value=int(input_shape_parm * 0.2), max_value=input_shape_parm, step=2
+    )
     activation = hp.Choice("activation", ["sigmoid", "relu", "tanh", "selu", "softplus"])
     optimizer = hp.Choice("optimizer", ["sgd", "adam", "adadelta"])
     threshold = hp.Float("threshold", min_value=0.1, max_value=0.9, sampling="log")
@@ -168,7 +174,7 @@ def build_model(hp, input_shape: None | int, num_classes: None | int) -> AutoCla
         activation=activation,
         threshold=threshold,
         optimizer=optimizer,
-        input_shape=input_shape,
+        input_shape_parm=input_shape_parm,
         num_classes=num_classes,
     )
     return model
@@ -180,8 +186,9 @@ def setup_model(
     epochs: int,
     train_size: float = 0.7,
     seed=None,
+    train_mode: bool = True,
     filepath: str = "./my_dir/best_model.keras",
-    **kwargs
+    **kwargs,
 ) -> AutoClassifier:
     """Setup model for training and tuning.
@@ -197,6 +204,8 @@ def setup_model(
         The proportion of the dataset to use for training.
     seed : `Any` | `int`
         The random seed to use for reproducibility.
+    train_mode : `bool`
+        Whether to train the model or not.
     filepath : `str`
         The path to save the best model to.
@@ -234,8 +243,18 @@ def setup_model(
     ), "Categorical variables within the DataFrame must be encoded, this is done by using the DataFrameEncoder from likelihood."
     validation_split = 1.0 - train_size
     # Create my_dir path if it does not exist
-    if not os.path.exists(directory):
-        os.makedirs(directory)
+    if train_mode:
+        # Create a new directory if it does not exist
+        try:
+            if not os.path.exists(directory):
+                os.makedirs(directory)
+            else:
+                print(f"Directory {directory} already exists, it will be deleted.")
+                rmtree(directory)
+                os.makedirs(directory)
+        except:
+            print("Warning: unable to create directory")
         # Create a Classifier instance
         y_encoder = OneHotEncoder()
@@ -245,10 +264,12 @@ def setup_model(
         y = np.asarray(y).astype(np.float32)
-        input_shape = X.shape[1]
+        input_shape_parm = X.shape[1]
         num_classes = y.shape[1]
         global build_model
-        build_model = partial(build_model, input_shape=input_shape, num_classes=num_classes)
+        build_model = partial(
+            build_model, input_shape_parm=input_shape_parm, num_classes=num_classes
+        )
         # Create the AutoKeras model
         tuner = keras_tuner.RandomSearch(

likelihood-1.2.18/likelihood/models/simulation.py ADDED Viewed

@@ -0,0 +1,103 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from numpy import ndarray
+from pandas.core.frame import DataFrame
+from likelihood.tools import DataScaler, FeatureSelection, OneHotEncoder, check_nan_inf
+# --------------------------------------------------------------------------------------------------------------------------------------
+class SimulationEngine(FeatureSelection):
+    """
+    This class implements a predictive model that utilizes multiple linear regression for numerical target variables
+    and multiple logistic regression for categorical target variables.
+    The class provides methods for training the model on a given dataset, making predictions,
+    and evaluating the model's performance.
+    Key features:
+    - Supports both numerical and categorical target variables, automatically selecting the appropriate regression method.
+    - Includes methods for data preprocessing, model fitting, prediction, and evaluation metrics.
+    - Designed to be flexible and user-friendly, allowing for easy integration with various datasets.
+    Usage:
+    - Instantiate the class with the training data and target variable.
+    - Call the fit method to train the model.
+    - Use the predict method to generate predictions on new data.
+    - Evaluate the model using built-in metrics for accuracy and error.
+    This class is suitable for applications in data analysis and machine learning, enabling users to leverage regression techniques
+    for both numerical and categorical outcomes efficiently.
+    """
+    def __init__(self, df: DataFrame, n_importances: int, use_scaler: bool = False, **kwargs):
+        self.df = df
+        self.n_importances = n_importances
+        self.use_scaler = use_scaler
+        super().__init__(**kwargs)
+    def predict(self, df: DataFrame, column: str) -> ndarray | list:
+        # Let us assign the dictionary entries corresponding to the column
+        w, quick_encoder, names_cols, dfe, numeric_dict = self.w_dict[column]
+        df = df[names_cols].copy()
+        # Change the scale of the dataframe
+        dataset = self.df.copy()
+        dataset.drop(columns=column, inplace=True)
+        numeric_df = dataset.select_dtypes(include="number")
+        if self.use_scaler:
+            scaler = DataScaler(numeric_df.copy().to_numpy().T, n=None)
+            _ = scaler.rescale()
+            dataset_ = df.copy()
+            numeric_df = dataset_.select_dtypes(include="number")
+            numeric_scaled = scaler.rescale(dataset_=numeric_df.to_numpy())
+            numeric_df = pd.DataFrame(numeric_scaled.T, columns=numeric_df.columns)
+            for col in numeric_df.columns:
+                df[col] = numeric_df[col].values
+        # Encoding the datadrame
+        for num, colname in enumerate(dfe._encode_columns):
+            if df[colname].dtype == "object":
+                encode_dict = dfe.encoding_list[num]
+                df[colname] = df[colname].apply(
+                    dfe._code_transformation_to, dictionary_list=encode_dict
+                )
+        # PREDICTION
+        y = df.to_numpy() @ w
+        # Categorical column
+        if quick_encoder != None:
+            one_hot = OneHotEncoder()
+            y = one_hot.decode(y)
+            encoding_dic = quick_encoder.decoding_list[0]
+            y = [encoding_dic[item] for item in y]
+        # Numeric column
+        else:
+            if self.use_scaler:
+                # scale output
+                y += 1
+                y /= 2
+                y = y * (self.df[column].max() - self.df[column].min())
+        return y[:]
+    def fit(self, **kwargs) -> None:
+        # We run the feature selection algorithm
+        self.get_digraph(self.df, self.n_importances, self.use_scaler)
+    def _clean_data(self, df: DataFrame) -> DataFrame:
+        df.replace([np.inf, -np.inf], np.nan, inplace=True)
+        df.replace(" ", np.nan, inplace=True)
+        df = check_nan_inf(df)
+        df = df.reset_index()
+        df = df.drop(columns=["index"])
+        return df

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood/tools/numeric_tools.py RENAMED Viewed

@@ -1,14 +1,14 @@
 from typing import Dict
 import numpy as np
+import pandas as pd
 from numpy import arange, array, ndarray, random
 from numpy.linalg import solve
 from pandas.core.frame import DataFrame
-# -------------------------------------------------------------------------
-def xi_corr(df: DataFrame) -> DataFrame:
+# -------------------------------------------------------------------------
+def xi_corr(df: pd.DataFrame) -> pd.DataFrame:
     """Calculate new coefficient of correlation for all pairs of columns in a `DataFrame`.
     Parameters
@@ -19,11 +19,15 @@ def xi_corr(df: DataFrame) -> DataFrame:
     Returns
     -------
     `DataFrame`
-        A dataframe with variable names as keys and their corresponding
-        correlation coefficients as values.
+        A square dataframe with variable names as both index and columns,
+        containing their corresponding correlation coefficients.
     """
-    correlations = {}
-    columns = df.columns
+    columns = df.select_dtypes(include="number").columns
+    n = len(columns)
+    # Initialize a square matrix for the correlations
+    correlations = pd.DataFrame(1.0, index=columns, columns=columns)
     for i, col1 in enumerate(columns):
         for j, col2 in enumerate(columns):
@@ -32,9 +36,9 @@ def xi_corr(df: DataFrame) -> DataFrame:
                 y = df[col2].values
                 correlation = xicor(x, y)
-                correlations[(col1, col2)] = round(correlation, 8)
-    # dictionary to dataframe
-    correlations = DataFrame(list(correlations.items()), columns=["Variables", "Xi Correlation"])
+                correlations.loc[col1, col2] = round(correlation, 8)
+                correlations.loc[col2, col1] = round(correlation, 8)  # Mirror the correlation
     return correlations
@@ -51,10 +55,11 @@ def xi_corr(df: DataFrame) -> DataFrame:
 """
-def xicor(X: ndarray, Y: ndarray, ties: bool = True) -> float:
-    """Calculate a new coefficient of correlation between two variables.
+def xicor(X: np.ndarray, Y: np.ndarray, ties: bool = True, random_seed: int = None) -> float:
+    """
+    Calculate a generalized coefficient of correlation between two variables.
-    The new coefficient of correlation is a generalization of Pearson's correlation.
+    This coefficient is an extension of Pearson's correlation, accounting for ties with optional randomization.
     Parameters
     ----------
@@ -62,30 +67,52 @@ def xicor(X: ndarray, Y: ndarray, ties: bool = True) -> float:
         The first variable to be correlated. Must have at least one dimension.
     Y : `np.ndarray`
         The second variable to be correlated. Must have at least one dimension.
+    ties : bool
+        Whether to handle ties using randomization.
+    random_seed : int, optional
+        Seed for the random number generator for reproducibility.
     Returns
     -------
     xi : `float`
         The estimated value of the new coefficient of correlation.
     """
-    random.seed(42)
+    # Early return for identical arrays
+    if np.array_equal(X, Y):
+        return 1.0
     n = len(X)
-    order = array([i[0] for i in sorted(enumerate(X), key=lambda x: x[1])])
+    # Early return for cases with less than 2 elements
+    if n < 2:
+        return 0.0
+    # Flatten the input arrays if they are multidimensional
+    X = X.flatten()
+    Y = Y.flatten()
+    # Get the sorted order of X
+    order = np.argsort(X)
     if ties:
-        l = array([sum(y >= Y[order]) for y in Y[order]])
-        r = l.copy()
-        for j in range(n):
-            if sum([r[j] == r[i] for i in range(n)]) > 1:
-                tie_index = array([r[j] == r[i] for i in range(n)])
-                r[tie_index] = random.choice(
-                    r[tie_index] - arange(0, sum([r[j] == r[i] for i in range(n)])),
-                    sum(tie_index),
-                    replace=False,
-                )
-        return 1 - n * sum(abs(r[1:] - r[: n - 1])) / (2 * sum(l * (n - l)))
+        np.random.seed(random_seed)  # Set seed for reproducibility if needed
+        ranks = np.argsort(np.argsort(Y[order]))  # Get ranks
+        unique_ranks, counts = np.unique(ranks, return_counts=True)
+        # Adjust ranks for ties by shuffling
+        for rank, count in zip(unique_ranks, counts):
+            if count > 1:
+                tie_indices = np.where(ranks == rank)[0]
+                np.random.shuffle(ranks[tie_indices])  # Randomize ties
+        cumulative_counts = np.array([np.sum(y >= Y[order]) for y in Y[order]])
+        return 1 - n * np.sum(np.abs(ranks[1:] - ranks[: n - 1])) / (
+            2 * np.sum(cumulative_counts * (n - cumulative_counts))
+        )
     else:
-        r = array([sum(y >= Y[order]) for y in Y[order]])
-        return 1 - 3 * sum(abs(r[1:] - r[: n - 1])) / (n**2 - 1)
+        ranks = np.argsort(np.argsort(Y[order]))  # Get ranks without randomization
+        return 1 - 3 * np.sum(np.abs(ranks[1:] - ranks[: n - 1])) / (n**2 - 1)
 # -------------------------------------------------------------------------
@@ -257,8 +284,8 @@ if __name__ == "__main__":
     print("New correlation coefficient test")
     X = np.random.rand(100, 1)
     Y = X * X
-    print("coefficient for Y = X * X : ", xicor(X, Y))
+    print("coefficient for Y = X * X : ", xicor(X, Y, False))
+    df["index"] = ["A", "B", "C", "D"]
     print("New correlation coefficient test for pandas DataFrame")
     values_df = xi_corr(df)
     breakpoint()

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood/tools/tools.py RENAMED Viewed

@@ -640,14 +640,14 @@ def cal_average(y: ndarray, alpha: float = 1):
 class DataScaler:
     """numpy array `scaler` and `rescaler`"""
-    __slots__ = ["dataset_", "_n", "data_scaled", "values", "transpose"]
+    __slots__ = ["dataset_", "_n", "data_scaled", "values", "transpose", "inv_fitting"]
     def __init__(self, dataset: ndarray, n: int = 1) -> None:
         """Initializes the parameters required for scaling the data"""
         self.dataset_ = dataset.copy()
         self._n = n
-    def rescale(self) -> ndarray:
+    def rescale(self, dataset_: ndarray | None = None) -> ndarray:
         """Perform a standard rescaling of the data
         Returns
@@ -655,11 +655,26 @@ class DataScaler:
         data_scaled : `np.array`
             An array containing the scaled data.
         """
+        if isinstance(dataset_, ndarray):
+            data_scaled = np.copy(dataset_)
+            mu = self.values[0]
+            sigma = self.values[1]
+            f = self.values[2]
+            data_scaled = data_scaled.reshape((self.dataset_.shape[0], -1))
+            for i in range(self.dataset_.shape[0]):
+                if self._n != None:
+                    poly = f[i](self.inv_fitting[i](data_scaled[i]))
+                    data_scaled[i] += -poly
+                data_scaled[i] = 2 * ((data_scaled[i] - mu[i]) / sigma[i]) - 1
+            return data_scaled
+        else:
+            self.data_scaled = np.copy(self.dataset_.copy())
         mu = []
         sigma = []
         fitting = []
-        self.data_scaled = np.copy(self.dataset_)
+        self.inv_fitting = []
         try:
             xaxis = range(self.dataset_.shape[1])
         except:
@@ -675,12 +690,15 @@ class DataScaler:
         for i in range(self.dataset_.shape[0]):
             if self._n != None:
                 fit = np.polyfit(xaxis, self.dataset_[i, :], self._n)
+                inv_fit = np.polyfit(self.dataset_[i, :], xaxis, self._n)
                 f = np.poly1d(fit)
                 poly = f(xaxis)
                 fitting.append(f)
+                self.inv_fitting.append(inv_fit)
                 self.data_scaled[i, :] += -poly
             else:
                 fitting.append(0.0)
+                self.inv_fitting.append(0.0)
             mu.append(np.min(self.data_scaled[i, :]))
             if np.max(self.data_scaled[i, :]) != 0:
                 sigma.append(np.max(self.data_scaled[i, :]) - mu[i])
@@ -1064,7 +1082,7 @@ class FeatureSelection:
         self.all_features_imp_graph: List[Tuple] = []
         self.w_dict = dict()
-    def get_digraph(self, dataset: DataFrame, n_importances: int) -> str:
+    def get_digraph(self, dataset: DataFrame, n_importances: int, use_scaler: bool = False) -> str:
         """
         Get directed graph showing importance of features.
@@ -1092,10 +1110,11 @@ class FeatureSelection:
             feature_string += column + "; "
         numeric_df = curr_dataset.select_dtypes(include="number")
-        self.scaler = DataScaler(numeric_df.copy().to_numpy().T, n=None)
-        numeric_scaled = self.scaler.rescale()
-        numeric_df = pd.DataFrame(numeric_scaled.T, columns=numeric_df.columns)
-        curr_dataset[numeric_df.columns] = numeric_df
+        if use_scaler:
+            self.scaler = DataScaler(numeric_df.copy().to_numpy().T, n=None)
+            numeric_scaled = self.scaler.rescale()
+            numeric_df = pd.DataFrame(numeric_scaled.T, columns=numeric_df.columns)
+            curr_dataset[numeric_df.columns] = numeric_df
         # We construct dictionary to save index for scaling
         numeric_dict = dict(zip(list(numeric_df.columns), range(len(list(numeric_df.columns)))))
@@ -1119,7 +1138,6 @@ class FeatureSelection:
                 dfe = DataFrameEncoder(X_aux)
                 encoded_df = dfe.encode(save_mode=False)
                 # We train
                 Model.fit(encoded_df.to_numpy().T, Y.to_numpy().T)
                 # We obtain importance
                 importance = Model.get_importances()
@@ -1202,7 +1220,7 @@ class FeatureSelection:
 def check_nan_inf(df: DataFrame) -> DataFrame:
-    """Check for `NaN` and `Inf` values in the `DataFrame`. If any are found removes them."""
+    """Checks for `NaN` and `Inf` values in the `DataFrame`. If any are found they will be removed."""
     nan_values = df.isnull().values.any()
     count = np.isinf(df.select_dtypes(include="number")).values.sum()
     print("There are null values : ", nan_values)

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: likelihood
-Version: 1.2.17
+Version: 1.2.18
 Summary: A package that performs the maximum likelihood algorithm.
 Home-page: https://github.com/jzsmoreno/likelihood/
 Author: J. A. Moreno-Guerra
@@ -28,7 +28,7 @@ Requires-Dist: corner
 Provides-Extra: full
 Requires-Dist: networkx; extra == "full"
 Requires-Dist: pyvis; extra == "full"
-Requires-Dist: tensorflow; extra == "full"
+Requires-Dist: tensorflow==2.15.0; extra == "full"
 Requires-Dist: keras-tuner; extra == "full"
 Requires-Dist: scikit-learn; extra == "full"

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood.egg-info/requires.txt RENAMED Viewed

@@ -14,6 +14,6 @@ corner
 [full]
 networkx
 pyvis
-tensorflow
+tensorflow==2.15.0
 keras-tuner
 scikit-learn

{likelihood-1.2.17 → likelihood-1.2.18}/setup.py RENAMED Viewed

@@ -31,7 +31,7 @@ setuptools.setup(
     packages=setuptools.find_packages(),
     install_requires=install_requires,
     extras_require={
-        "full": ["networkx", "pyvis", "tensorflow", "keras-tuner", "scikit-learn"],
+        "full": ["networkx", "pyvis", "tensorflow==2.15.0", "keras-tuner", "scikit-learn"],
     },
     classifiers=[
         "Programming Language :: Python :: 3",

likelihood-1.2.17/likelihood/models/simulation.py DELETED Viewed

@@ -1,91 +0,0 @@
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-from numpy import ndarray
-from pandas.core.frame import DataFrame
-from likelihood.tools import DataScaler, FeatureSelection, OneHotEncoder, check_nan_inf
-# --------------------------------------------------------------------------------------------------------------------------------------
-class SimulationEngine(FeatureSelection):
-    def __init__(self, df: DataFrame, n_importances: int, **kwargs):
-        self.df = df
-        self.n_importances = n_importances
-        super().__init__(**kwargs)
-    def predict(self, df: DataFrame, column: str, n: int = None) -> ndarray | list:
-        # We clean the data set
-        df = self._clean_data(df)
-        # Let us assign the dictionary entries corresponding to the column
-        w, quick_encoder, names_cols, dfe, numeric_dict = self.w_dict[column]
-        try:
-            df = df[names_cols].copy()
-            # Change the scale of the dataframe
-            numeric_df = df.select_dtypes(include="number")
-            scaler = DataScaler(numeric_df.copy().to_numpy().T, n=None)
-            numeric_scaled = scaler.rescale()
-            numeric_df = pd.DataFrame(numeric_scaled.T, columns=numeric_df.columns)
-            df[numeric_df.columns] = numeric_df
-            # Encoding the datadrame
-            for num, colname in enumerate(dfe._encode_columns):
-                if df[colname].dtype == "object":
-                    encode_dict = dfe.encoding_list[num]
-                    df[colname] = df[colname].apply(
-                        dfe._code_transformation_to, dictionary_list=encode_dict
-                    )
-        except:
-            print("The dataframe provided does not have the same columns as in the fit method.")
-        # Assign value to n if n is None
-        n = n if n != None else len(df)
-        # Generation of assertion
-        assert n > 0 and n <= len(df), '"n" must be interger or "<= len(df)".'
-        # Sample dataframe
-        df_aux = df.sample(n)
-        # PREDICTION
-        y = df_aux.to_numpy() @ w
-        # Categorical column
-        if quick_encoder != None:
-            one_hot = OneHotEncoder()
-            y = one_hot.decode(y)
-            encoding_dic = quick_encoder.decoding_list[0]
-            y = [encoding_dic[item] for item in y]
-        # Numeric column
-        else:
-            # scale output
-            i = numeric_dict[column]
-            y += 1
-            y /= 2
-            y = y * self.scaler.values[1][i]
-        return y
-    def fit(self, **kwargs) -> None:
-        # We run the feature selection algorithm
-        self.get_digraph(self.df, self.n_importances)
-    def _clean_data(self, df: DataFrame) -> DataFrame:
-        df.replace([np.inf, -np.inf], np.nan, inplace=True)
-        df.replace(" ", np.nan, inplace=True)
-        df = check_nan_inf(df)
-        df = df.reset_index()
-        df = df.drop(columns=["index"])
-        return df

{likelihood-1.2.17 → likelihood-1.2.18}/LICENSE RENAMED Viewed

File without changes

{likelihood-1.2.17 → likelihood-1.2.18}/README.md RENAMED Viewed

File without changes

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood/__init__.py RENAMED Viewed

File without changes

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood/graph/__init__.py RENAMED Viewed

File without changes

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood/graph/graph.py RENAMED Viewed

File without changes

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood/graph/nn.py RENAMED Viewed

File without changes

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood/main.py RENAMED Viewed

File without changes

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood/models/__init__.py RENAMED Viewed

File without changes

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood/models/deep/__init__.py RENAMED Viewed

File without changes

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood/models/regression.py RENAMED Viewed

File without changes

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood/models/utils.py RENAMED Viewed

File without changes

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood/tools/__init__.py RENAMED Viewed

File without changes

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{likelihood-1.2.17 → likelihood-1.2.18}/likelihood.egg-info/top_level.txt RENAMED Viewed

File without changes

{likelihood-1.2.17 → likelihood-1.2.18}/setup.cfg RENAMED Viewed

File without changes

likelihood 1.2.17__tar.gz → 1.2.18__tar.gz

likelihood 1.2.17tar.gz → 1.2.18tar.gz