PyPI - likelihood - Versions diffs - 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

likelihood 1.3.1py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

likelihood/models/deep/autoencoders.py CHANGED Viewed

@@ -19,9 +19,9 @@ from functools import wraps
 import keras_tuner
 import tensorflow as tf
-from keras.src.engine.input_layer import InputLayer
 from pandas.core.frame import DataFrame
 from sklearn.manifold import TSNE
+from tensorflow.keras.layers import InputLayer
 from tensorflow.keras.regularizers import l2
 from likelihood.tools import OneHotEncoder
@@ -39,53 +39,227 @@ def suppress_warnings(func):
     return wrapper
+class EarlyStopping:
+    def __init__(self, patience=10, min_delta=0.001):
+        self.patience = patience
+        self.min_delta = min_delta
+        self.best_loss = np.inf
+        self.counter = 0
+        self.stop_training = False
+    def __call__(self, current_loss):
+        if self.best_loss - current_loss > self.min_delta:
+            self.best_loss = current_loss
+            self.counter = 0
+        else:
+            self.counter += 1
+        if self.counter >= self.patience:
+            self.stop_training = True
+def mse_loss(y_true, y_pred):
+    """
+    Mean squared error loss function.
+    Parameters
+    ----------
+    y_true : `tf.Tensor`
+        The true values.
+    y_pred : `tf.Tensor`
+        The predicted values.
+    Returns
+    -------
+    `tf.Tensor`
+    """
+    return tf.reduce_mean(tf.square(y_true - y_pred))
+def kl_loss(mean, log_var):
+    """
+    Kullback-Leibler divergence loss function.
+    Parameters
+    ----------
+    mean : `tf.Tensor`
+        The mean of the distribution.
+    log_var : `tf.Tensor`
+        The log variance of the distribution.
+    Returns
+    -------
+    `tf.Tensor`
+    """
+    return -0.5 * tf.reduce_mean(1 + log_var - tf.square(mean) - tf.exp(log_var))
+def vae_loss(y_true, y_pred, mean, log_var):
+    """
+    Variational autoencoder loss function.
+    Parameters
+    ----------
+    y_true : `tf.Tensor`
+        The true values.
+    y_pred : `tf.Tensor`
+        The predicted values.
+    mean : `tf.Tensor`
+        The mean of the distribution.
+    log_var : `tf.Tensor`
+        The log variance of the distribution.
+    Returns
+    -------
+    `tf.Tensor`
+    """
+    return mse_loss(y_true, y_pred) + kl_loss(mean, log_var)
+def sampling(mean, log_var, epsilon_value=1e-8):
+    """
+    Samples from the distribution.
+    Parameters
+    ----------
+    mean : `tf.Tensor`
+        The mean of the distribution.
+    log_var : `tf.Tensor`
+        The log variance of the distribution.
+    epsilon_value : float
+        A small value to avoid numerical instability.
+    Returns
+    -------
+    `tf.Tensor`
+    """
+    epsilon = tf.random.normal(shape=tf.shape(mean), mean=0.0, stddev=1.0)
+    stddev = tf.exp(0.5 * log_var) + epsilon_value
+    epsilon = tf.random.normal(shape=tf.shape(mean), mean=0.0, stddev=1.0)
+    return mean + stddev * epsilon
+def check_for_nans(tensors, name="Tensor"):
+    for t in tensors:
+        if tf.reduce_any(tf.math.is_nan(t)) or tf.reduce_any(tf.math.is_inf(t)):
+            print(f"Warning: {name} contains NaNs or Infs")
+            return True
+    return False
+def cal_loss_step(batch, encoder, decoder, vae_mode=False, training=True):
+    """
+    Calculates the loss value on a batch of data.
+    Parameters
+    ----------
+    batch : `tf.Tensor`
+        The batch of data.
+    encoder : `tf.keras.Model`
+        The encoder model.
+    decoder : `tf.keras.Model`
+        The decoder model.
+    optimizer : `tf.keras.optimizers.Optimizer`
+        The optimizer to use.
+    vae_mode : `bool`
+        Whether to use variational autoencoder mode. Default is False.
+    training : `bool`
+        Whether the model is in training mode. Default is True.
+    Returns
+    -------
+    `tf.Tensor`
+        The loss value.
+    """
+    if vae_mode:
+        mean, log_var = encoder(batch, training=training)
+        log_var = tf.clip_by_value(log_var, clip_value_min=1e-8, clip_value_max=tf.float32.max)
+        decoded = decoder(sampling(mean, log_var), training=training)
+        loss = vae_loss(batch, decoded, mean, log_var)
+    else:
+        encoded = encoder(batch, training=training)
+        decoded = decoder(encoded, training=training)
+        loss = mse_loss(batch, decoded)
+    return loss
+@tf.function
+def train_step(batch, encoder, decoder, optimizer, vae_mode=False):
+    """
+    Trains the model on a batch of data.
+    Parameters
+    ----------
+    mean : `tf.Tensor`
+        The mean of the distribution.
+    log_var : `tf.Tensor`
+        The log variance of the distribution.
+    batch : `tf.Tensor`
+        The batch of data.
+    encoder : `tf.keras.Model`
+        The encoder model.
+    decoder : `tf.keras.Model`
+        The decoder model.
+    optimizer : `tf.keras.optimizers.Optimizer`
+        The optimizer to use.
+    vae_mode : `bool`
+        Whether to use variational autoencoder mode. Default is False.
+    Returns
+    -------
+    `tf.Tensor`
+        The loss value.
+    """
+    optimizer.build(encoder.trainable_variables + decoder.trainable_variables)
+    with tf.GradientTape() as encoder_tape, tf.GradientTape() as decoder_tape:
+        loss = cal_loss_step(batch, encoder, decoder, vae_mode=vae_mode)
+    gradients_of_encoder = encoder_tape.gradient(loss, encoder.trainable_variables)
+    gradients_of_decoder = decoder_tape.gradient(loss, decoder.trainable_variables)
+    optimizer.apply_gradients(zip(gradients_of_encoder, encoder.trainable_variables))
+    optimizer.apply_gradients(zip(gradients_of_decoder, decoder.trainable_variables))
+    return loss
 @tf.keras.utils.register_keras_serializable(package="Custom", name="AutoClassifier")
 class AutoClassifier(tf.keras.Model):
     """
     An auto-classifier model that automatically determines the best classification strategy based on the input data.
-    Attributes:
-        - input_shape_parm: The shape of the input data.
-        - num_classes: The number of classes in the dataset.
-        - units: The number of neurons in each hidden layer.
-        - activation: The type of activation function to use for the neural network layers.
-    Methods:
-        __init__(self, input_shape_parm, num_classes, units, activation): Initializes an AutoClassifier instance with the given parameters.
-        build(self, input_shape_parm): Builds the model architecture based on input_shape_parm.
-        call(self, x): Defines the forward pass of the model.
-        get_config(self): Returns the configuration of the model.
-        from_config(cls, config): Recreates an instance of AutoClassifier from its configuration.
-    """
-    def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
-        """
-        Initializes an AutoClassifier instance with the given parameters.
+    Parameters
+    ----------
+    input_shape_parm : `int`
+        The shape of the input data.
+    num_classes : `int`
+        The number of classes in the dataset.
+    units : `int`
+        The number of neurons in each hidden layer.
+    activation : `str`
+        The type of activation function to use for the neural network layers.
-        Parameters
-        ----------
-        input_shape_parm : `int`
-            The shape of the input data.
-        num_classes : `int`
-            The number of classes in the dataset.
-        units : `int`
-            The number of neurons in each hidden layer.
-        activation : `str`
-            The type of activation function to use for the neural network layers.
+    Keyword Arguments:
+    ----------
+    Additional keyword arguments to pass to the model.
-        Keyword Arguments:
-        ----------
-        Additional keyword arguments to pass to the model.
+    classifier_activation : `str`
+        The activation function to use for the classifier layer. Default is "softmax". If the activation function is not a classification function, the model can be used in regression problems.
+    num_layers : `int`
+        The number of hidden layers in the classifier. Default is 1.
+    dropout : `float`
+        The dropout rate to use in the classifier. Default is None.
+    l2_reg : `float`
+        The L2 regularization parameter. Default is 0.0.
+    vae_mode : `bool`
+        Whether to use variational autoencoder mode. Default is False.
+    vae_units : `int`
+        The number of units in the variational autoencoder. Default is 2.
+    """
-        classifier_activation : `str`
-            The activation function to use for the classifier layer. Default is "softmax". If the activation function is not a classification function, the model can be used in regression problems.
-        num_layers : `int`
-            The number of hidden layers in the classifier. Default is 1.
-        dropout : `float`
-            The dropout rate to use in the classifier. Default is None.
-        l2_reg : `float`
-            The L2 regularization parameter. Default is 0.0.
-        """
+    def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
         super(AutoClassifier, self).__init__()
         self.input_shape_parm = input_shape_parm
         self.num_classes = num_classes
@@ -99,9 +273,10 @@ class AutoClassifier(tf.keras.Model):
         self.num_layers = kwargs.get("num_layers", 1)
         self.dropout = kwargs.get("dropout", None)
         self.l2_reg = kwargs.get("l2_reg", 0.0)
+        self.vae_mode = kwargs.get("vae_mode", False)
+        self.vae_units = kwargs.get("vae_units", 2)
-    def build(self, input_shape):
-        # Encoder with L2 regularization
+    def build_encoder_decoder(self, input_shape):
         self.encoder = (
             tf.keras.Sequential(
                 [
@@ -121,7 +296,6 @@ class AutoClassifier(tf.keras.Model):
             else self.encoder
         )
-        # Decoder with L2 regularization
         self.decoder = (
             tf.keras.Sequential(
                 [
@@ -141,6 +315,58 @@ class AutoClassifier(tf.keras.Model):
             else self.decoder
         )
+    def build(self, input_shape):
+        if self.vae_mode:
+            inputs = tf.keras.Input(shape=self.input_shape_parm, name="encoder_input")
+            x = tf.keras.layers.Dense(
+                units=self.units,
+                kernel_regularizer=l2(self.l2_reg),
+                kernel_initializer="he_normal",
+            )(inputs)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.Activation(self.activation)(x)
+            x = tf.keras.layers.Dense(
+                units=int(self.units / 2),
+                kernel_regularizer=l2(self.l2_reg),
+                kernel_initializer="he_normal",
+                name="encoder_hidden",
+            )(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.Activation(self.activation)(x)
+            mean = tf.keras.layers.Dense(2, name="mean")(x)
+            log_var = tf.keras.layers.Dense(2, name="log_var")(x)
+            log_var = tf.keras.layers.Lambda(lambda x: x + 1e-7)(log_var)
+            self.encoder = (
+                tf.keras.Model(inputs, [mean, log_var], name="encoder")
+                if not self.encoder
+                else self.encoder
+            )
+            self.decoder = (
+                tf.keras.Sequential(
+                    [
+                        tf.keras.layers.Dense(
+                            units=self.units,
+                            kernel_regularizer=l2(self.l2_reg),
+                        ),
+                        tf.keras.layers.BatchNormalization(),
+                        tf.keras.layers.Activation(self.activation),
+                        tf.keras.layers.Dense(
+                            units=self.input_shape_parm,
+                            kernel_regularizer=l2(self.l2_reg),
+                        ),
+                        tf.keras.layers.BatchNormalization(),
+                        tf.keras.layers.Activation(self.activation),
+                    ]
+                )
+                if not self.decoder
+                else self.decoder
+            )
+        else:
+            self.build_encoder_decoder(input_shape)
         # Classifier with L2 regularization
         self.classifier = tf.keras.Sequential()
         if self.num_layers > 1:
@@ -162,8 +388,75 @@ class AutoClassifier(tf.keras.Model):
             )
         )
+    def train_encoder_decoder(
+        self, data, epochs, batch_size, validation_split=0.2, patience=10, **kwargs
+    ):
+        """
+        Trains the encoder and decoder on the input data.
+        Parameters
+        ----------
+        data : `tf.data.Dataset`, `np.ndarray`
+            The input data.
+        epochs : `int`
+            The number of epochs to train for.
+        batch_size : `int`
+            The batch size to use.
+        validation_split : `float`
+            The proportion of the dataset to use for validation. Default is 0.2.
+        patience : `int`
+            The number of epochs to wait before early stopping. Default is 10.
+        Keyword Arguments:
+        ----------
+        Additional keyword arguments to pass to the model.
+        """
+        verbose = kwargs.get("verbose", True)
+        optimizer = kwargs.get("optimizer", tf.keras.optimizers.Adam())
+        dummy_input = tf.convert_to_tensor(tf.random.normal([1, self.input_shape_parm]))
+        self.build(dummy_input.shape)
+        if not self.vae_mode:
+            dummy_output = self.encoder(dummy_input)
+            self.decoder(dummy_output)
+        else:
+            mean, log_var = self.encoder(dummy_input)
+            dummy_output = sampling(mean, log_var)
+            self.decoder(dummy_output)
+        if isinstance(data, np.ndarray):
+            data = tf.data.Dataset.from_tensor_slices(data).batch(batch_size)
+            data = data.map(lambda x: tf.cast(x, tf.float32))
+        early_stopping = EarlyStopping(patience=patience)
+        train_batches = data.take(int((1 - validation_split) * len(data)))
+        val_batches = data.skip(int((1 - validation_split) * len(data)))
+        for epoch in range(epochs):
+            for train_batch, val_batch in zip(train_batches, val_batches):
+                loss_train = train_step(
+                    train_batch, self.encoder, self.decoder, optimizer, self.vae_mode
+                )
+                loss_val = cal_loss_step(
+                    val_batch, self.encoder, self.decoder, self.vae_mode, False
+                )
+            early_stopping(loss_train)
+            if early_stopping.stop_training:
+                print(f"Early stopping triggered at epoch {epoch}.")
+                break
+            if epoch % 10 == 0 and verbose:
+                print(
+                    f"Epoch {epoch}: Train Loss: {loss_train:.6f} Validation Loss: {loss_val:.6f}"
+                )
+        self.freeze_encoder_decoder()
     def call(self, x):
-        encoded = self.encoder(x)
+        if self.vae_mode:
+            mean, log_var = self.encoder(x)
+            encoded = sampling(mean, log_var)
+        else:
+            encoded = self.encoder(x)
         decoded = self.decoder(encoded)
         combined = tf.concat([decoded, encoded], axis=1)
         classification = self.classifier(combined)
@@ -190,7 +483,7 @@ class AutoClassifier(tf.keras.Model):
     def set_encoder_decoder(self, source_model):
         """
         Sets the encoder and decoder layers from another AutoClassifier instance,
-        ensuring compatibility in dimensions.
+        ensuring compatibility in dimensions. Only works if vae_mode is False.
         Parameters:
         -----------
@@ -257,6 +550,8 @@ class AutoClassifier(tf.keras.Model):
             "num_layers": self.num_layers,
             "dropout": self.dropout,
             "l2_reg": self.l2_reg,
+            "vae_mode": self.vae_mode,
+            "vae_units": self.vae_units,
         }
         base_config = super(AutoClassifier, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
@@ -272,6 +567,8 @@ class AutoClassifier(tf.keras.Model):
             num_layers=config["num_layers"],
             dropout=config["dropout"],
             l2_reg=config["l2_reg"],
+            vae_mode=config["vae_mode"],
+            vae_units=config["vae_units"],
         )
@@ -302,6 +599,8 @@ def call_existing_code(
         The shape of the input data.
     num_classes : `int`
         The number of classes in the dataset.
+    num_layers : `int`
+        The number of hidden layers in the classifier. Default is 1.
     Returns
     -------
@@ -578,7 +877,10 @@ class GetInsights:
     def __init__(self, model: AutoClassifier, inputs: np.ndarray) -> None:
         self.inputs = inputs
         self.model = model
-        self.encoder_layer = self.model.encoder.layers[0]
+        if isinstance(self.model.encoder.layers[0], InputLayer):
+            self.encoder_layer = self.model.encoder.layers[1]
+        else:
+            self.encoder_layer = self.model.encoder.layers[0]
         self.decoder_layer = self.model.decoder.layers[0]
         self.encoder_weights = self.encoder_layer.get_weights()[0]
         self.decoder_weights = self.decoder_layer.get_weights()[0]
@@ -607,7 +909,12 @@ class GetInsights:
             indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
             inputs = inputs[indexes]
         inputs[np.isnan(inputs)] = 0.0
-        encoded = self.model.encoder(inputs)
+        # check if self.model.encoder(inputs) has two outputs
+        try:
+            mean, log_var = self.model.encoder(inputs)
+            encoded = sampling(mean, log_var)
+        except:
+            encoded = self.model.encoder(inputs)
         reconstructed = self.model.decoder(encoded)
         combined = tf.concat([reconstructed, encoded], axis=1)
         self.classification = self.model.classifier(combined).numpy().argmax(axis=1)

{likelihood-1.3.1.dist-info → likelihood-1.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: likelihood
-Version: 1.3.1
+Version: 1.4.0
 Summary: A package that performs the maximum likelihood algorithm.
 Home-page: https://github.com/jzsmoreno/likelihood/
 Author: J. A. Moreno-Guerra

{likelihood-1.3.1.dist-info → likelihood-1.4.0.dist-info}/RECORD RENAMED Viewed

@@ -9,12 +9,12 @@ likelihood/models/regression.py,sha256=9cakyGlJCEO6WfpoKLh3GxdXQeQp7cUvJIkQ5odT0
 likelihood/models/simulation.py,sha256=LFyE_szo7sDukviMLeg_6RoyAaI7yMXUy8f4mDOrGoc,8460
 likelihood/models/utils.py,sha256=dvigPi_hxcs5ntfHr7Y1JvP5ULtMW3kkN0nJpS4orE8,1319
 likelihood/models/deep/__init__.py,sha256=-KIPippVaMqgG8mEgYjNxYQdqOUcFhUuKhbVe8TTCfo,28
-likelihood/models/deep/autoencoders.py,sha256=S11ARmoROTNFC4AZLuTcB-ymbm14NUH-a0Dg861fsYM,28203
+likelihood/models/deep/autoencoders.py,sha256=OKS-Hudn4gxm7ttu8cjJ0PX7RGlmFpN2Xd-FEIvkagU,37866
 likelihood/tools/__init__.py,sha256=MCjsCWfBNKE2uMN0VizDN1uFzZ_md0X2WZeBdWhrCR8,50
 likelihood/tools/numeric_tools.py,sha256=FA44kbiAcxcquz1el_g3Pqsp5ii8XFkAIrsMs5bGkj0,11445
 likelihood/tools/tools.py,sha256=6JLZBHxc4f1lJfw4aBwdS2s16EpydFNqLZF73I7wddQ,44412
-likelihood-1.3.1.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
-likelihood-1.3.1.dist-info/METADATA,sha256=CuHvFiy8Pr1ToXw2oCvqoEsnImRtcifH2Mn7HQnFEkc,2822
-likelihood-1.3.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-likelihood-1.3.1.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
-likelihood-1.3.1.dist-info/RECORD,,
+likelihood-1.4.0.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
+likelihood-1.4.0.dist-info/METADATA,sha256=5rLf_PhvtIGUNcqh6rp1YP7IW27UbxcRKdm87j9-1qU,2822
+likelihood-1.4.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+likelihood-1.4.0.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
+likelihood-1.4.0.dist-info/RECORD,,

{likelihood-1.3.1.dist-info → likelihood-1.4.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{likelihood-1.3.1.dist-info → likelihood-1.4.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{likelihood-1.3.1.dist-info → likelihood-1.4.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

likelihood 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl

likelihood 1.3.1py3-none-any.whl → 1.4.0py3-none-any.whl