PyPI - likelihood - Versions diffs - 1.3.2__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

likelihood 1.3.2py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

likelihood/graph/nn.py CHANGED Viewed

@@ -5,7 +5,7 @@ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 logging.getLogger("tensorflow").setLevel(logging.ERROR)
 import warnings
-from typing import List, Tuple
+from typing import Any, List, Tuple
 import numpy as np
 import pandas as pd
@@ -15,48 +15,43 @@ from pandas.core.frame import DataFrame
 from sklearn.metrics import f1_score
 from sklearn.model_selection import train_test_split
-from likelihood.tools import generate_feature_yaml
 tf.get_logger().setLevel("ERROR")
+from likelihood.tools import LoRALayer
-def compare_similarity(arr1: np.ndarray, arr2: np.ndarray) -> int:
-    """Compares the similarity between two arrays of categories.
-    Parameters
-    ----------
-    arr1 : `ndarray`
-        The first array of categories.
-    arr2 : `ndarray`
-        The second array of categories.
+def compare_similarity(arr1: List[Any], arr2: List[Any], threshold: float = 0.05) -> int:
+    """Calculate the similarity between two arrays considering numeric values near to 1 in ratio."""
-    Returns
-    -------
-    count: `int`
-        The number of categories that are the same in both arrays.
-    """
+    def is_similar(a: Any, b: Any) -> bool:
+        if isinstance(a, (int, float)) and isinstance(b, (int, float)):
+            if a == 0 and b == 0:
+                return True
+            if a == 0 or b == 0:
+                return False
+            # For numeric values, check if their ratio is within the threshold range
+            ratio = max(a, b) / min(a, b)
+            return 1 - threshold <= ratio <= 1 + threshold
+        else:
+            return a == b
-    count = 0
-    for i in range(len(arr1)):
-        if arr1[i] == arr2[i]:
-            count += 1
-    return count
+    return sum(is_similar(a, b) for a, b in zip(arr1, arr2))
 def cal_adjacency_matrix(
     df: DataFrame, exclude_subset: List[str] = [], sparse: bool = True, **kwargs
 ) -> Tuple[dict, np.ndarray]:
     """Calculates the adjacency matrix for a given DataFrame.
-    The adjacency matrix is a matrix that represents the similarity between each pair of categories.
+    The adjacency matrix is a matrix that represents the similarity between each pair of features.
     The similarity is calculated using the `compare_similarity` function.
-    The resulting matrix is a square matrix with the same number of rows and columns as the input DataFrame.
+    The resulting matrix is a square matrix with the same number of rows and columns as the rows of the input DataFrame.
     Parameters
     ----------
     df : `DataFrame`
-        The input DataFrame containing the categories.
+        The input DataFrame containing the features.
     exclude_subset : `List[str]`, optional
-        A list of categories to exclude from the calculation of the adjacency matrix.
+        A list of features to exclude from the calculation of the adjacency matrix.
     sparse : `bool`, optional
         Whether to return a sparse matrix or a dense matrix.
     **kwargs : `dict`
@@ -65,48 +60,33 @@ def cal_adjacency_matrix(
     Keyword Arguments:
     ----------
     similarity: `int`
-        The minimum number of categories that must be the same in both arrays to be considered similar.
+        The minimum number of features that must be the same in both arrays to be considered similar.
     Returns
     -------
     adj_dict : `dict`
-        A dictionary containing the categories.
+        A dictionary containing the features.
     adjacency_matrix : `ndarray`
         The adjacency matrix.
     """
-    yaml_ = generate_feature_yaml(df)
-    categorical_columns = yaml_["categorical_features"]
     if len(exclude_subset) > 0:
-        categorical_columns = [col for col in categorical_columns if col not in exclude_subset]
-    if len(categorical_columns) > 1:
-        df_categorical = df[categorical_columns].copy()
+        columns = [col for col in df.columns if col not in exclude_subset]
+        df_ = df[columns].copy()
     else:
-        categorical_columns = [
-            col
-            for col in df.columns
-            if (
-                col not in exclude_subset
-                and pd.api.types.is_integer_dtype(df[col])
-                and len(df[col].unique()) > 2
-            )
-        ]
-        df_categorical = df[categorical_columns].copy()
+        df_ = df.copy()
-    assert len(df_categorical) > 0
+    assert len(df_) > 0
-    similarity = kwargs.get("similarity", len(df_categorical.columns) - 1)
-    assert similarity <= df_categorical.shape[1]
+    similarity = kwargs.get("similarity", len(df_.columns) - 1)
+    assert similarity <= df_.shape[1]
-    adj_dict = {}
-    for index, row in df_categorical.iterrows():
-        adj_dict[index] = row.to_list()
+    adj_dict = {index: row.tolist() for index, row in df_.iterrows()}
-    adjacency_matrix = np.zeros((len(df_categorical), len(df_categorical)))
+    adjacency_matrix = np.zeros((len(df_), len(df_)))
-    for i in range(len(df_categorical)):
-        for j in range(len(df_categorical)):
+    for i in range(len(df_)):
+        for j in range(len(df_)):
             if compare_similarity(adj_dict[i], adj_dict[j]) >= similarity:
                 adjacency_matrix[i][j] = 1
@@ -131,8 +111,10 @@ class Data:
         df: DataFrame,
         target: str | None = None,
         exclude_subset: List[str] = [],
+        **kwargs,
     ):
-        _, adjacency = cal_adjacency_matrix(df, exclude_subset=exclude_subset, sparse=True)
+        sparse = kwargs.get("sparse", True)
+        _, adjacency = cal_adjacency_matrix(df, exclude_subset=exclude_subset, sparse=sparse)
         if target is not None:
             X = df.drop(columns=[target] + exclude_subset)
         else:
@@ -147,16 +129,20 @@ class Data:
 @tf.keras.utils.register_keras_serializable(package="Custom", name="VanillaGNNLayer")
 class VanillaGNNLayer(tf.keras.layers.Layer):
-    def __init__(self, dim_in, dim_out, kernel_initializer="glorot_uniform", **kwargs):
+    def __init__(self, dim_in, dim_out, rank=None, kernel_initializer="glorot_uniform", **kwargs):
         super(VanillaGNNLayer, self).__init__(**kwargs)
         self.dim_out = dim_out
+        self.rank = rank
         self.kernel_initializer = kernel_initializer
         self.linear = None
     def build(self, input_shape):
-        self.linear = tf.keras.layers.Dense(
-            self.dim_out, use_bias=False, kernel_initializer=self.kernel_initializer
-        )
+        if self.rank:
+            self.linear = LoRALayer(self.dim_out, rank=self.rank)
+        else:
+            self.linear = tf.keras.layers.Dense(
+                self.dim_out, use_bias=False, kernel_initializer=self.kernel_initializer
+            )
         super(VanillaGNNLayer, self).build(input_shape)
     def call(self, x, adjacency):
@@ -169,8 +155,11 @@ class VanillaGNNLayer(tf.keras.layers.Layer):
         config.update(
             {
                 "dim_out": self.dim_out,
-                "kernel_initializer": tf.keras.initializers.serialize(
-                    self.linear.kernel_initializer
+                "rank": self.rank,
+                "kernel_initializer": (
+                    None
+                    if self.rank
+                    else tf.keras.initializers.serialize(self.linear.kernel_initializer)
                 ),
             }
         )
@@ -179,14 +168,16 @@ class VanillaGNNLayer(tf.keras.layers.Layer):
 @tf.keras.utils.register_keras_serializable(package="Custom", name="VanillaGNN")
 class VanillaGNN(tf.keras.Model):
-    def __init__(self, dim_in, dim_h, dim_out, **kwargs):
+    def __init__(self, dim_in, dim_h, dim_out, rank=2, **kwargs):
         super(VanillaGNN, self).__init__(**kwargs)
         self.dim_in = dim_in
         self.dim_h = dim_h
         self.dim_out = dim_out
-        self.gnn1 = VanillaGNNLayer(self.dim_in, self.dim_h)
-        self.gnn2 = VanillaGNNLayer(self.dim_h, self.dim_h)
-        self.gnn3 = VanillaGNNLayer(self.dim_h, self.dim_out)
+        self.rank = rank
+        self.gnn1 = VanillaGNNLayer(self.dim_in, self.dim_h, self.rank)
+        self.gnn2 = VanillaGNNLayer(self.dim_h, self.dim_h, self.rank)
+        self.gnn3 = VanillaGNNLayer(self.dim_h, self.dim_out, None)
     def call(self, x, adjacency):
         h = self.gnn1(x, adjacency)
@@ -208,13 +199,13 @@ class VanillaGNN(tf.keras.Model):
         out = self(x, adjacency)
         loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=out)
         loss = tf.reduce_mean(loss)
-        f1 = self.compute_f1_score(out, y)
+        f1 = round(self.compute_f1_score(out, y), 4)
         return loss.numpy(), f1
     def test(self, data):
         out = self(data.x, data.adjacency)
         test_f1 = self.compute_f1_score(out, data.y)
-        return test_f1
+        return round(test_f1, 4)
     def predict(self, data):
         out = self(data.x, data.adjacency)
@@ -225,6 +216,7 @@ class VanillaGNN(tf.keras.Model):
             "dim_in": self.dim_in,
             "dim_h": self.dim_h,
             "dim_out": self.dim_out,
+            "rank": self.rank,
         }
         base_config = super(VanillaGNN, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
@@ -235,6 +227,7 @@ class VanillaGNN(tf.keras.Model):
             dim_in=config["dim_in"],
             dim_h=config["dim_h"],
             dim_out=config["dim_out"],
+            rank=config["rank"],
         )
     @tf.function
@@ -248,10 +241,6 @@ class VanillaGNN(tf.keras.Model):
         return loss
     def fit(self, data, epochs, batch_size, test_size=0.2, optimizer="adam"):
-        warnings.warn(
-            "It is normal for validation metrics to underperform. Use the test method to validate after training.",
-            UserWarning,
-        )
         optimizers = {
             "sgd": tf.keras.optimizers.SGD(),
             "adam": tf.keras.optimizers.Adam(),
@@ -290,56 +279,20 @@ class VanillaGNN(tf.keras.Model):
             train_f1_scores.append(train_f1)
             if epoch % 5 == 0:
+                clear_output(wait=True)
+                warnings.warn(
+                    "It is normal for validation metrics to underperform during training. Use the test method to validate after training.",
+                    UserWarning,
+                )
                 val_loss, val_f1 = self.evaluate(X_test, adjacency_test, y_test)
                 val_losses.append(val_loss)
                 val_f1_scores.append(val_f1)
-                clear_output(wait=True)
                 print(
-                    f"Epoch {epoch:>3} | Train Loss: {train_loss:.3f} | Train F1: {train_f1:.3f} | Val Loss: {val_loss:.3f} | Val F1: {val_f1:.3f}"
+                    f"Epoch {epoch:>3} | Train Loss: {train_loss:.4f} | Train F1: {train_f1:.4f} | Val Loss: {val_loss:.4f} | Val F1: {val_f1:.4f}"
                 )
         return train_losses, train_f1_scores, val_losses, val_f1_scores
 if __name__ == "__main__":
-    # Example usage
-    import pandas as pd
-    from sklearn.datasets import load_iris
-    # Load the dataset
-    iris = load_iris()
-    # Convert to a DataFrame for easy exploration
-    iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
-    iris_df["species"] = iris.target
-    iris_df["sepal length (cm)"] = iris_df["sepal length (cm)"].astype("category")
-    iris_df["sepal width (cm)"] = iris_df["sepal width (cm)"].astype("category")
-    iris_df["petal length (cm)"] = iris_df["petal length (cm)"].astype("category")
-    iris_df["petal width (cm)"] = iris_df["petal width (cm)"].astype("category")
-    # Display the first few rows of the dataset
-    print(iris_df.head())
-    iris_df = iris_df.sample(frac=1, replace=False).reset_index(drop=True)
-    data = Data(iris_df, "species")
-    model = VanillaGNN(dim_in=data.x.shape[1], dim_h=8, dim_out=len(iris_df["species"].unique()))
-    print("Before training F1:", model.test(data))
-    model.fit(data, epochs=200, batch_size=32, test_size=0.5)
-    model.save("./best_model", save_format="tf")
-    print("After training F1:", model.test(data))
-    best_model = tf.keras.models.load_model("./best_model")
-    print("After loading F1:", best_model.test(data))
-    df_results = pd.DataFrame()
-    # Suppose we have a new dataset without the target variable
-    iris_df = iris_df.drop(columns=["species"])
-    data_new = Data(iris_df)
-    print("Predictions:", best_model.predict(data_new))
-    df_results["predicted"] = list(model.predict(data))
-    df_results["actual"] = list(data.y)
-    # df_results.to_csv("results.csv", index=False)
-    breakpoint()
+    print("Examples will be running below")

likelihood/models/deep/autoencoders.py CHANGED Viewed

@@ -24,7 +24,7 @@ from sklearn.manifold import TSNE
 from tensorflow.keras.layers import InputLayer
 from tensorflow.keras.regularizers import l2
-from likelihood.tools import OneHotEncoder
+from likelihood.tools import LoRALayer, OneHotEncoder
 tf.get_logger().setLevel("ERROR")
@@ -39,53 +39,231 @@ def suppress_warnings(func):
     return wrapper
+class EarlyStopping:
+    def __init__(self, patience=10, min_delta=0.001):
+        self.patience = patience
+        self.min_delta = min_delta
+        self.best_loss = np.inf
+        self.counter = 0
+        self.stop_training = False
+    def __call__(self, current_loss):
+        if self.best_loss - current_loss > self.min_delta:
+            self.best_loss = current_loss
+            self.counter = 0
+        else:
+            self.counter += 1
+        if self.counter >= self.patience:
+            self.stop_training = True
+def mse_loss(y_true, y_pred):
+    """
+    Mean squared error loss function.
+    Parameters
+    ----------
+    y_true : `tf.Tensor`
+        The true values.
+    y_pred : `tf.Tensor`
+        The predicted values.
+    Returns
+    -------
+    `tf.Tensor`
+    """
+    return tf.reduce_mean(tf.square(y_true - y_pred))
+def kl_loss(mean, log_var):
+    """
+    Kullback-Leibler divergence loss function.
+    Parameters
+    ----------
+    mean : `tf.Tensor`
+        The mean of the distribution.
+    log_var : `tf.Tensor`
+        The log variance of the distribution.
+    Returns
+    -------
+    `tf.Tensor`
+    """
+    return -0.5 * tf.reduce_mean(1 + log_var - tf.square(mean) - tf.exp(log_var))
+def vae_loss(y_true, y_pred, mean, log_var):
+    """
+    Variational autoencoder loss function.
+    Parameters
+    ----------
+    y_true : `tf.Tensor`
+        The true values.
+    y_pred : `tf.Tensor`
+        The predicted values.
+    mean : `tf.Tensor`
+        The mean of the distribution.
+    log_var : `tf.Tensor`
+        The log variance of the distribution.
+    Returns
+    -------
+    `tf.Tensor`
+    """
+    return mse_loss(y_true, y_pred) + kl_loss(mean, log_var)
+def sampling(mean, log_var, epsilon_value=1e-8):
+    """
+    Samples from the distribution.
+    Parameters
+    ----------
+    mean : `tf.Tensor`
+        The mean of the distribution.
+    log_var : `tf.Tensor`
+        The log variance of the distribution.
+    epsilon_value : float
+        A small value to avoid numerical instability.
+    Returns
+    -------
+    `tf.Tensor`
+    """
+    epsilon = tf.random.normal(shape=tf.shape(mean), mean=0.0, stddev=1.0)
+    stddev = tf.exp(0.5 * log_var) + epsilon_value
+    epsilon = tf.random.normal(shape=tf.shape(mean), mean=0.0, stddev=1.0)
+    return mean + stddev * epsilon
+def check_for_nans(tensors, name="Tensor"):
+    for t in tensors:
+        if tf.reduce_any(tf.math.is_nan(t)) or tf.reduce_any(tf.math.is_inf(t)):
+            print(f"Warning: {name} contains NaNs or Infs")
+            return True
+    return False
+def cal_loss_step(batch, encoder, decoder, vae_mode=False, training=True):
+    """
+    Calculates the loss value on a batch of data.
+    Parameters
+    ----------
+    batch : `tf.Tensor`
+        The batch of data.
+    encoder : `tf.keras.Model`
+        The encoder model.
+    decoder : `tf.keras.Model`
+        The decoder model.
+    optimizer : `tf.keras.optimizers.Optimizer`
+        The optimizer to use.
+    vae_mode : `bool`
+        Whether to use variational autoencoder mode. Default is False.
+    training : `bool`
+        Whether the model is in training mode. Default is True.
+    Returns
+    -------
+    `tf.Tensor`
+        The loss value.
+    """
+    if vae_mode:
+        mean, log_var = encoder(batch, training=training)
+        log_var = tf.clip_by_value(log_var, clip_value_min=1e-8, clip_value_max=tf.float32.max)
+        decoded = decoder(sampling(mean, log_var), training=training)
+        loss = vae_loss(batch, decoded, mean, log_var)
+    else:
+        encoded = encoder(batch, training=training)
+        decoded = decoder(encoded, training=training)
+        loss = mse_loss(batch, decoded)
+    return loss
+@tf.function
+def train_step(batch, encoder, decoder, optimizer, vae_mode=False):
+    """
+    Trains the model on a batch of data.
+    Parameters
+    ----------
+    mean : `tf.Tensor`
+        The mean of the distribution.
+    log_var : `tf.Tensor`
+        The log variance of the distribution.
+    batch : `tf.Tensor`
+        The batch of data.
+    encoder : `tf.keras.Model`
+        The encoder model.
+    decoder : `tf.keras.Model`
+        The decoder model.
+    optimizer : `tf.keras.optimizers.Optimizer`
+        The optimizer to use.
+    vae_mode : `bool`
+        Whether to use variational autoencoder mode. Default is False.
+    Returns
+    -------
+    `tf.Tensor`
+        The loss value.
+    """
+    optimizer.build(encoder.trainable_variables + decoder.trainable_variables)
+    with tf.GradientTape() as encoder_tape, tf.GradientTape() as decoder_tape:
+        loss = cal_loss_step(batch, encoder, decoder, vae_mode=vae_mode)
+    gradients_of_encoder = encoder_tape.gradient(loss, encoder.trainable_variables)
+    gradients_of_decoder = decoder_tape.gradient(loss, decoder.trainable_variables)
+    optimizer.apply_gradients(zip(gradients_of_encoder, encoder.trainable_variables))
+    optimizer.apply_gradients(zip(gradients_of_decoder, decoder.trainable_variables))
+    return loss
 @tf.keras.utils.register_keras_serializable(package="Custom", name="AutoClassifier")
 class AutoClassifier(tf.keras.Model):
     """
     An auto-classifier model that automatically determines the best classification strategy based on the input data.
-    Attributes:
-        - input_shape_parm: The shape of the input data.
-        - num_classes: The number of classes in the dataset.
-        - units: The number of neurons in each hidden layer.
-        - activation: The type of activation function to use for the neural network layers.
-    Methods:
-        __init__(self, input_shape_parm, num_classes, units, activation): Initializes an AutoClassifier instance with the given parameters.
-        build(self, input_shape_parm): Builds the model architecture based on input_shape_parm.
-        call(self, x): Defines the forward pass of the model.
-        get_config(self): Returns the configuration of the model.
-        from_config(cls, config): Recreates an instance of AutoClassifier from its configuration.
-    """
-    def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
-        """
-        Initializes an AutoClassifier instance with the given parameters.
+    Parameters
+    ----------
+    input_shape_parm : `int`
+        The shape of the input data.
+    num_classes : `int`
+        The number of classes in the dataset.
+    units : `int`
+        The number of neurons in each hidden layer.
+    activation : `str`
+        The type of activation function to use for the neural network layers.
-        Parameters
-        ----------
-        input_shape_parm : `int`
-            The shape of the input data.
-        num_classes : `int`
-            The number of classes in the dataset.
-        units : `int`
-            The number of neurons in each hidden layer.
-        activation : `str`
-            The type of activation function to use for the neural network layers.
+    Keyword Arguments:
+    ----------
+    Additional keyword arguments to pass to the model.
-        Keyword Arguments:
-        ----------
-        Additional keyword arguments to pass to the model.
+    classifier_activation : `str`
+        The activation function to use for the classifier layer. Default is "softmax". If the activation function is not a classification function, the model can be used in regression problems.
+    num_layers : `int`
+        The number of hidden layers in the classifier. Default is 1.
+    dropout : `float`
+        The dropout rate to use in the classifier. Default is None.
+    l2_reg : `float`
+        The L2 regularization parameter. Default is 0.0.
+    vae_mode : `bool`
+        Whether to use variational autoencoder mode. Default is False.
+    vae_units : `int`
+        The number of units in the variational autoencoder. Default is 2.
+    lora_mode : `bool`
+        Whether to use LoRA layers. Default is False.
+    lora_rank : `int`
+        The rank of the LoRA layer. Default is 4.
+    """
-        classifier_activation : `str`
-            The activation function to use for the classifier layer. Default is "softmax". If the activation function is not a classification function, the model can be used in regression problems.
-        num_layers : `int`
-            The number of hidden layers in the classifier. Default is 1.
-        dropout : `float`
-            The dropout rate to use in the classifier. Default is None.
-        l2_reg : `float`
-            The L2 regularization parameter. Default is 0.0.
-        """
+    def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
         super(AutoClassifier, self).__init__()
         self.input_shape_parm = input_shape_parm
         self.num_classes = num_classes
@@ -99,9 +277,12 @@ class AutoClassifier(tf.keras.Model):
         self.num_layers = kwargs.get("num_layers", 1)
         self.dropout = kwargs.get("dropout", None)
         self.l2_reg = kwargs.get("l2_reg", 0.0)
+        self.vae_mode = kwargs.get("vae_mode", False)
+        self.vae_units = kwargs.get("vae_units", 2)
+        self.lora_mode = kwargs.get("lora_mode", False)
+        self.lora_rank = kwargs.get("lora_rank", 4)
-    def build(self, input_shape):
-        # Encoder with L2 regularization
+    def build_encoder_decoder(self, input_shape):
         self.encoder = (
             tf.keras.Sequential(
                 [
@@ -121,7 +302,6 @@ class AutoClassifier(tf.keras.Model):
             else self.encoder
         )
-        # Decoder with L2 regularization
         self.decoder = (
             tf.keras.Sequential(
                 [
@@ -141,9 +321,61 @@ class AutoClassifier(tf.keras.Model):
             else self.decoder
         )
+    def build(self, input_shape):
+        if self.vae_mode:
+            inputs = tf.keras.Input(shape=self.input_shape_parm, name="encoder_input")
+            x = tf.keras.layers.Dense(
+                units=self.units,
+                kernel_regularizer=l2(self.l2_reg),
+                kernel_initializer="he_normal",
+            )(inputs)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.Activation(self.activation)(x)
+            x = tf.keras.layers.Dense(
+                units=int(self.units / 2),
+                kernel_regularizer=l2(self.l2_reg),
+                kernel_initializer="he_normal",
+                name="encoder_hidden",
+            )(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.Activation(self.activation)(x)
+            mean = tf.keras.layers.Dense(2, name="mean")(x)
+            log_var = tf.keras.layers.Dense(2, name="log_var")(x)
+            log_var = tf.keras.layers.Lambda(lambda x: x + 1e-7)(log_var)
+            self.encoder = (
+                tf.keras.Model(inputs, [mean, log_var], name="encoder")
+                if not self.encoder
+                else self.encoder
+            )
+            self.decoder = (
+                tf.keras.Sequential(
+                    [
+                        tf.keras.layers.Dense(
+                            units=self.units,
+                            kernel_regularizer=l2(self.l2_reg),
+                        ),
+                        tf.keras.layers.BatchNormalization(),
+                        tf.keras.layers.Activation(self.activation),
+                        tf.keras.layers.Dense(
+                            units=self.input_shape_parm,
+                            kernel_regularizer=l2(self.l2_reg),
+                        ),
+                        tf.keras.layers.BatchNormalization(),
+                        tf.keras.layers.Activation(self.activation),
+                    ]
+                )
+                if not self.decoder
+                else self.decoder
+            )
+        else:
+            self.build_encoder_decoder(input_shape)
         # Classifier with L2 regularization
         self.classifier = tf.keras.Sequential()
-        if self.num_layers > 1:
+        if self.num_layers > 1 and not self.lora_mode:
             for _ in range(self.num_layers - 1):
                 self.classifier.add(
                     tf.keras.layers.Dense(
@@ -154,16 +386,106 @@ class AutoClassifier(tf.keras.Model):
                 )
                 if self.dropout:
                     self.classifier.add(tf.keras.layers.Dropout(self.dropout))
-        self.classifier.add(
-            tf.keras.layers.Dense(
-                units=self.num_classes,
-                activation=self.classifier_activation,
-                kernel_regularizer=l2(self.l2_reg),
+            self.classifier.add(
+                tf.keras.layers.Dense(
+                    units=self.num_classes,
+                    activation=self.classifier_activation,
+                    kernel_regularizer=l2(self.l2_reg),
+                )
             )
-        )
+        elif self.lora_mode:
+            for _ in range(self.num_layers - 1):
+                self.classifier.add(
+                    LoRALayer(units=self.units, rank=self.lora_rank, name=f"LoRA_{_}")
+                )
+                self.classifier.add(tf.keras.layers.Activation(self.activation))
+                if self.dropout:
+                    self.classifier.add(tf.keras.layers.Dropout(self.dropout))
+            self.classifier.add(
+                tf.keras.layers.Dense(
+                    units=self.num_classes,
+                    activation=self.classifier_activation,
+                    kernel_regularizer=l2(self.l2_reg),
+                )
+            )
+        else:
+            self.classifier.add(
+                tf.keras.layers.Dense(
+                    units=self.num_classes,
+                    activation=self.classifier_activation,
+                    kernel_regularizer=l2(self.l2_reg),
+                )
+            )
+    def train_encoder_decoder(
+        self, data, epochs, batch_size, validation_split=0.2, patience=10, **kwargs
+    ):
+        """
+        Trains the encoder and decoder on the input data.
+        Parameters
+        ----------
+        data : `tf.data.Dataset`, `np.ndarray`
+            The input data.
+        epochs : `int`
+            The number of epochs to train for.
+        batch_size : `int`
+            The batch size to use.
+        validation_split : `float`
+            The proportion of the dataset to use for validation. Default is 0.2.
+        patience : `int`
+            The number of epochs to wait before early stopping. Default is 10.
+        Keyword Arguments:
+        ----------
+        Additional keyword arguments to pass to the model.
+        """
+        verbose = kwargs.get("verbose", True)
+        optimizer = kwargs.get("optimizer", tf.keras.optimizers.Adam())
+        dummy_input = tf.convert_to_tensor(tf.random.normal([1, self.input_shape_parm]))
+        self.build(dummy_input.shape)
+        if not self.vae_mode:
+            dummy_output = self.encoder(dummy_input)
+            self.decoder(dummy_output)
+        else:
+            mean, log_var = self.encoder(dummy_input)
+            dummy_output = sampling(mean, log_var)
+            self.decoder(dummy_output)
+        if isinstance(data, np.ndarray):
+            data = tf.data.Dataset.from_tensor_slices(data).batch(batch_size)
+            data = data.map(lambda x: tf.cast(x, tf.float32))
+        early_stopping = EarlyStopping(patience=patience)
+        train_batches = data.take(int((1 - validation_split) * len(data)))
+        val_batches = data.skip(int((1 - validation_split) * len(data)))
+        for epoch in range(epochs):
+            for train_batch, val_batch in zip(train_batches, val_batches):
+                loss_train = train_step(
+                    train_batch, self.encoder, self.decoder, optimizer, self.vae_mode
+                )
+                loss_val = cal_loss_step(
+                    val_batch, self.encoder, self.decoder, self.vae_mode, False
+                )
+            early_stopping(loss_train)
+            if early_stopping.stop_training:
+                print(f"Early stopping triggered at epoch {epoch}.")
+                break
+            if epoch % 10 == 0 and verbose:
+                print(
+                    f"Epoch {epoch}: Train Loss: {loss_train:.6f} Validation Loss: {loss_val:.6f}"
+                )
+        self.freeze_encoder_decoder()
     def call(self, x):
-        encoded = self.encoder(x)
+        if self.vae_mode:
+            mean, log_var = self.encoder(x)
+            encoded = sampling(mean, log_var)
+        else:
+            encoded = self.encoder(x)
         decoded = self.decoder(encoded)
         combined = tf.concat([decoded, encoded], axis=1)
         classification = self.classifier(combined)
@@ -190,7 +512,7 @@ class AutoClassifier(tf.keras.Model):
     def set_encoder_decoder(self, source_model):
         """
         Sets the encoder and decoder layers from another AutoClassifier instance,
-        ensuring compatibility in dimensions.
+        ensuring compatibility in dimensions. Only works if vae_mode is False.
         Parameters:
         -----------
@@ -257,6 +579,10 @@ class AutoClassifier(tf.keras.Model):
             "num_layers": self.num_layers,
             "dropout": self.dropout,
             "l2_reg": self.l2_reg,
+            "vae_mode": self.vae_mode,
+            "vae_units": self.vae_units,
+            "lora_mode": self.lora_mode,
+            "lora_rank": self.lora_rank,
         }
         base_config = super(AutoClassifier, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
@@ -272,6 +598,10 @@ class AutoClassifier(tf.keras.Model):
             num_layers=config["num_layers"],
             dropout=config["dropout"],
             l2_reg=config["l2_reg"],
+            vae_mode=config["vae_mode"],
+            vae_units=config["vae_units"],
+            lora_mode=config["lora_mode"],
+            lora_rank=config["lora_rank"],
         )
@@ -302,6 +632,8 @@ def call_existing_code(
         The shape of the input data.
     num_classes : `int`
         The number of classes in the dataset.
+    num_layers : `int`
+        The number of hidden layers in the classifier. Default is 1.
     Returns
     -------
@@ -578,7 +910,10 @@ class GetInsights:
     def __init__(self, model: AutoClassifier, inputs: np.ndarray) -> None:
         self.inputs = inputs
         self.model = model
-        self.encoder_layer = self.model.encoder.layers[0]
+        if isinstance(self.model.encoder.layers[0], InputLayer):
+            self.encoder_layer = self.model.encoder.layers[1]
+        else:
+            self.encoder_layer = self.model.encoder.layers[0]
         self.decoder_layer = self.model.decoder.layers[0]
         self.encoder_weights = self.encoder_layer.get_weights()[0]
         self.decoder_weights = self.decoder_layer.get_weights()[0]
@@ -607,7 +942,12 @@ class GetInsights:
             indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
             inputs = inputs[indexes]
         inputs[np.isnan(inputs)] = 0.0
-        encoded = self.model.encoder(inputs)
+        # check if self.model.encoder(inputs) has two outputs
+        try:
+            mean, log_var = self.model.encoder(inputs)
+            encoded = sampling(mean, log_var)
+        except:
+            encoded = self.model.encoder(inputs)
         reconstructed = self.model.decoder(encoded)
         combined = tf.concat([reconstructed, encoded], axis=1)
         self.classification = self.model.classifier(combined).numpy().argmax(axis=1)

likelihood/tools/__init__.py CHANGED Viewed

@@ -1,2 +1,3 @@
+from .models_tools import *
 from .numeric_tools import *
 from .tools import *

likelihood/tools/models_tools.py ADDED Viewed

@@ -0,0 +1,101 @@
+import logging
+import os
+import networkx as nx
+import pandas as pd
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+logging.getLogger("tensorflow").setLevel(logging.ERROR)
+import tensorflow as tf
+@tf.keras.utils.register_keras_serializable(package="Custom", name="LoRALayer")
+class LoRALayer(tf.keras.layers.Layer):
+    def __init__(self, units, rank=4, **kwargs):
+        super(LoRALayer, self).__init__(**kwargs)
+        self.units = units
+        self.rank = rank
+    def build(self, input_shape):
+        input_dim = input_shape[-1]
+        print(f"Input shape: {input_shape}")
+        if self.rank > input_dim:
+            raise ValueError(
+                f"Rank ({self.rank}) cannot be greater than input dimension ({input_dim})."
+            )
+        if self.rank > self.units:
+            raise ValueError(
+                f"Rank ({self.rank}) cannot be greater than number of units ({self.units})."
+            )
+        self.A = self.add_weight(
+            shape=(input_dim, self.rank), initializer="random_normal", trainable=True, name="A"
+        )
+        self.B = self.add_weight(
+            shape=(self.rank, self.units), initializer="random_normal", trainable=True, name="B"
+        )
+        print(f"Dense weights shape: {input_dim}x{self.units}")
+        print(f"LoRA weights shape: A{self.A.shape}, B{self.B.shape}")
+    def call(self, inputs):
+        lora_output = tf.matmul(tf.matmul(inputs, self.A), self.B)
+        return lora_output
+def apply_lora(model, rank=4):
+    inputs = tf.keras.Input(shape=model.input_shape[1:])
+    x = inputs
+    for layer in model.layers:
+        if isinstance(layer, tf.keras.layers.Dense):
+            print(f"Applying LoRA to layer {layer.name}")
+            x = LoRALayer(units=layer.units, rank=rank)(x)
+        else:
+            x = layer(x)
+    new_model = tf.keras.Model(inputs=inputs, outputs=x)
+    return new_model
+def graph_metrics(adj_matrix, eigenvector_threshold=1e-6):
+    """
+    This function calculates the following graph metrics using the adjacency matrix:
+    1. Degree Centrality
+    2. Clustering Coefficient
+    3. Eigenvector Centrality
+    4. Degree
+    5. Betweenness Centrality
+    6. Closeness Centrality
+    7. Assortativity
+    """
+    adj_matrix = adj_matrix.astype(int)
+    G = nx.from_numpy_array(adj_matrix)
+    degree_centrality = nx.degree_centrality(G)
+    clustering_coeff = nx.clustering(G)
+    try:
+        eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=500)
+    except nx.PowerIterationFailedConvergence:
+        print("Power iteration failed to converge. Returning NaN for eigenvector centrality.")
+        eigenvector_centrality = {node: float("nan") for node in G.nodes()}
+    for node, centrality in eigenvector_centrality.items():
+        if centrality < eigenvector_threshold:
+            eigenvector_centrality[node] = 0.0
+    degree = dict(G.degree())
+    betweenness_centrality = nx.betweenness_centrality(G)
+    closeness_centrality = nx.closeness_centrality(G)
+    assortativity = nx.degree_assortativity_coefficient(G)
+    metrics_df = pd.DataFrame(
+        {
+            "Degree": degree,
+            "Degree Centrality": degree_centrality,
+            "Clustering Coefficient": clustering_coeff,
+            "Eigenvector Centrality": eigenvector_centrality,
+            "Betweenness Centrality": betweenness_centrality,
+            "Closeness Centrality": closeness_centrality,
+        }
+    )
+    metrics_df["Assortativity"] = assortativity
+    return metrics_df

{likelihood-1.3.2.dist-info → likelihood-1.4.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: likelihood
-Version: 1.3.2
+Version: 1.4.1
 Summary: A package that performs the maximum likelihood algorithm.
 Home-page: https://github.com/jzsmoreno/likelihood/
 Author: J. A. Moreno-Guerra

{likelihood-1.3.2.dist-info → likelihood-1.4.1.dist-info}/RECORD RENAMED Viewed

@@ -2,19 +2,20 @@ likelihood/__init__.py,sha256=5C0hapdsk85XZhN_rssRAEFpkRRuKNtj6cyRbqD2_gM,994
 likelihood/main.py,sha256=fcCkGOOWKjfvw2tLVqjuKPV8t0rVCIT9FlbYcOv4EYo,7974
 likelihood/graph/__init__.py,sha256=6TuFDfmXTwpLyHl7_KqBfdzW6zqHjGzIFvymjFPlvjI,21
 likelihood/graph/graph.py,sha256=bLrNMvIh7GOTdPTwnNss8oPZ7cbSHQScAsH_ttmVUK0,3294
-likelihood/graph/nn.py,sha256=-OvHAeB3l2nd0ZeAk03cVDGBgaTn-WyGIsj5Rq7XeCY,12237
+likelihood/graph/nn.py,sha256=MD2M-KgQnrlHg3iS42vrdOnD51-GRk3CJ5CCMQ0DNWI,10763
 likelihood/models/__init__.py,sha256=e6nB4w47w0Q9DrAFeP3OcUgcoHOtf7Il4mBhgf4AARg,52
 likelihood/models/hmm.py,sha256=0s0gFySH1u4NjRaZDxiZ8oeTaFhFrw1x0GJxwy3dFrA,6253
 likelihood/models/regression.py,sha256=9cakyGlJCEO6WfpoKLh3GxdXQeQp7cUvJIkQ5odT0TA,9404
 likelihood/models/simulation.py,sha256=LFyE_szo7sDukviMLeg_6RoyAaI7yMXUy8f4mDOrGoc,8460
 likelihood/models/utils.py,sha256=dvigPi_hxcs5ntfHr7Y1JvP5ULtMW3kkN0nJpS4orE8,1319
 likelihood/models/deep/__init__.py,sha256=-KIPippVaMqgG8mEgYjNxYQdqOUcFhUuKhbVe8TTCfo,28
-likelihood/models/deep/autoencoders.py,sha256=BSAnopJYJ_lYRcRYT5ZoUVjfrAPlsjdAOjNb6mUD6Ds,28198
-likelihood/tools/__init__.py,sha256=MCjsCWfBNKE2uMN0VizDN1uFzZ_md0X2WZeBdWhrCR8,50
+likelihood/models/deep/autoencoders.py,sha256=O-H5KLmJvYjuE-b6l97esruihK6djocgxbkO2N1X2RM,39306
+likelihood/tools/__init__.py,sha256=N1IhMDzacsGQT2MIYBMBC0zTxes78vC_0gGrwkuPgmg,78
+likelihood/tools/models_tools.py,sha256=bjwoBlDeW1fUi58yJsuKcaTUTgWhOCNsc24_ESYI3BI,3502
 likelihood/tools/numeric_tools.py,sha256=FA44kbiAcxcquz1el_g3Pqsp5ii8XFkAIrsMs5bGkj0,11445
 likelihood/tools/tools.py,sha256=6JLZBHxc4f1lJfw4aBwdS2s16EpydFNqLZF73I7wddQ,44412
-likelihood-1.3.2.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
-likelihood-1.3.2.dist-info/METADATA,sha256=x-4GMzzwrsMNQocGRo57TUlUUSY2tBppmOzeRPaapIc,2822
-likelihood-1.3.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-likelihood-1.3.2.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
-likelihood-1.3.2.dist-info/RECORD,,
+likelihood-1.4.1.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
+likelihood-1.4.1.dist-info/METADATA,sha256=6otKXhthH5ZSUvYfcghD6CaC1skWZ0FBouXsGXuJfZw,2822
+likelihood-1.4.1.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
+likelihood-1.4.1.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
+likelihood-1.4.1.dist-info/RECORD,,

{likelihood-1.3.2.dist-info → likelihood-1.4.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.0)
+Generator: setuptools (75.8.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

{likelihood-1.3.2.dist-info → likelihood-1.4.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{likelihood-1.3.2.dist-info → likelihood-1.4.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

likelihood 1.3.2__py3-none-any.whl → 1.4.1__py3-none-any.whl

likelihood 1.3.2py3-none-any.whl → 1.4.1py3-none-any.whl