PyPI - likelihood - Versions diffs - 1.5.7__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

likelihood 1.5.7py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

likelihood/graph/__init__.py +8 -0
likelihood/graph/_nn.py +421 -0
likelihood/models/deep/__init__.py +11 -2
likelihood/models/deep/_autoencoders.py +895 -0
likelihood/models/deep/_predictor.py +810 -0
likelihood/models/deep/autoencoders.py +52 -29
likelihood/models/deep/gan.py +7 -7
likelihood/models/deep/predictor.py +10 -8
likelihood/models/deep/rl.py +350 -0
likelihood/models/simulation.py +9 -4
likelihood/tools/cat_embed.py +213 -0
likelihood/tools/tools.py +7 -2
{likelihood-1.5.7.dist-info → likelihood-2.0.0.dist-info}/METADATA +4 -3
likelihood-2.0.0.dist-info/RECORD +30 -0
likelihood-1.5.7.dist-info/RECORD +0 -25
{likelihood-1.5.7.dist-info → likelihood-2.0.0.dist-info}/WHEEL +0 -0
{likelihood-1.5.7.dist-info → likelihood-2.0.0.dist-info}/licenses/LICENSE +0 -0
{likelihood-1.5.7.dist-info → likelihood-2.0.0.dist-info}/top_level.txt +0 -0

likelihood/models/deep/autoencoders.py CHANGED Viewed

@@ -277,7 +277,8 @@ class AutoClassifier(tf.keras.Model):
                         activation=self.activation,
                         kernel_regularizer=l2(self.l2_reg),
                     ),
-                ]
+                ],
+                name="encoder",
             )
             if not self.encoder
             else self.encoder
@@ -296,7 +297,8 @@ class AutoClassifier(tf.keras.Model):
                         activation=self.activation,
                         kernel_regularizer=l2(self.l2_reg),
                     ),
-                ]
+                ],
+                name="decoder",
             )
             if not self.decoder
             else self.decoder
@@ -326,7 +328,7 @@ class AutoClassifier(tf.keras.Model):
             log_var = tf.keras.layers.Lambda(lambda x: x + 1e-7)(log_var)
             self.encoder = (
-                tf.keras.Model(inputs, [mean, log_var], name="encoder")
+                tf.keras.Model(inputs, [mean, log_var], name="vae_encoder")
                 if not self.encoder
                 else self.encoder
             )
@@ -345,7 +347,8 @@ class AutoClassifier(tf.keras.Model):
                         ),
                         tf.keras.layers.BatchNormalization(),
                         tf.keras.layers.Activation(self.activation),
-                    ]
+                    ],
+                    name="vae_decoder",
                 )
                 if not self.decoder
                 else self.decoder
@@ -366,13 +369,7 @@ class AutoClassifier(tf.keras.Model):
                 )
                 if self.dropout:
                     self.classifier.add(tf.keras.layers.Dropout(self.dropout))
-            self.classifier.add(
-                tf.keras.layers.Dense(
-                    units=self.num_classes,
-                    activation=self.classifier_activation,
-                    kernel_regularizer=l2(self.l2_reg),
-                )
-            )
         elif self.lora_mode:
             for _ in range(self.num_layers - 1):
                 self.classifier.add(
@@ -381,21 +378,14 @@ class AutoClassifier(tf.keras.Model):
                 self.classifier.add(tf.keras.layers.Activation(self.activation))
                 if self.dropout:
                     self.classifier.add(tf.keras.layers.Dropout(self.dropout))
-            self.classifier.add(
-                tf.keras.layers.Dense(
-                    units=self.num_classes,
-                    activation=self.classifier_activation,
-                    kernel_regularizer=l2(self.l2_reg),
-                )
-            )
-        else:
-            self.classifier.add(
-                tf.keras.layers.Dense(
-                    units=self.num_classes,
-                    activation=self.classifier_activation,
-                    kernel_regularizer=l2(self.l2_reg),
-                )
+        self.classifier.add(
+            tf.keras.layers.Dense(
+                units=self.num_classes,
+                activation=self.classifier_activation,
+                kernel_regularizer=l2(self.l2_reg),
             )
+        )
     def train_encoder_decoder(
         self, data, epochs, batch_size, validation_split=0.2, patience=10, **kwargs
@@ -494,12 +484,12 @@ class AutoClassifier(tf.keras.Model):
         Sets the encoder and decoder layers from another AutoClassifier instance,
         ensuring compatibility in dimensions. Only works if vae_mode is False.
-        Parameters:
+        Parameters
         -----------
         source_model : AutoClassifier
             The source model to copy the encoder and decoder layers from.
-        Raises:
+        Raises
         -------
         ValueError
             If the input shape or units of the source model do not match.
@@ -610,6 +600,13 @@ def call_existing_code(
     num_layers : `int`
         The number of hidden layers in the classifier. Default is 1.
+    Keyword Arguments:
+    ----------
+    vae_mode : `bool`
+        Whether to use variational autoencoder mode. Default is False.
+    vae_units : `int`
+        The number of units in the variational autoencoder. Default is 2.
     Returns
     -------
     `AutoClassifier`
@@ -617,6 +614,8 @@ def call_existing_code(
     """
     dropout = kwargs.get("dropout", None)
     l2_reg = kwargs.get("l2_reg", 0.0)
+    vae_mode = kwargs.get("vae_mode", False)
+    vae_units = kwargs.get("vae_units", 2)
     model = AutoClassifier(
         input_shape_parm=input_shape_parm,
         num_classes=num_classes,
@@ -625,6 +624,8 @@ def call_existing_code(
         num_layers=num_layers,
         dropout=dropout,
         l2_reg=l2_reg,
+        vae_mode=vae_mode,
+        vae_units=vae_units,
     )
     model.compile(
         optimizer=optimizer,
@@ -731,6 +732,24 @@ def build_model(
             else hyperparameters["l2_reg"]
         )
     )
+    vae_mode = (
+        hp.Choice("vae_mode", [True, False])
+        if "vae_mode" not in hyperparameters_keys
+        else hyperparameters["vae_mode"]
+    )
+    try:
+        vae_units = (
+            hp.Int("vae_units", min_value=2, max_value=10, step=1)
+            if ("vae_units" not in hyperparameters_keys) and vae_mode
+            else (
+                hp.Choice("vae_units", hyperparameters["vae_units"])
+                if isinstance(hyperparameters["vae_units"], list)
+                else hyperparameters["vae_units"]
+            )
+        )
+    except KeyError:
+        vae_units = None
     model = call_existing_code(
         units=units,
@@ -742,6 +761,8 @@ def build_model(
         num_layers=num_layers,
         dropout=dropout,
         l2_reg=l2_reg,
+        vae_mode=vae_mode,
+        vae_units=vae_units,
     )
     return model
@@ -876,6 +897,8 @@ def setup_model(
             tuner.results_summary()
     else:
         best_model = tf.keras.models.load_model(filepath)
     best_hps = tuner.get_best_hyperparameters(1)[0].values
-    return best_model, pd.DataFrame(best_hps, index=["Value"])
+    vae_mode = best_hps.get("vae_mode", hyperparameters.get("vae_mode", False))
+    best_hps["vae_units"] = None if not vae_mode else best_hps["vae_units"]
+    return best_model, pd.DataFrame(best_hps, index=["Value"]).dropna(axis=1)

likelihood/models/deep/gan.py CHANGED Viewed

@@ -41,7 +41,7 @@ class GANRegressor(tf.keras.Model):
         self.build(dummy_input.shape)
     def build(self, input_shape):
-        self.gan = tf.keras.models.Sequential([self.generator, self.discriminator])
+        self.gan = tf.keras.models.Sequential([self.generator, self.discriminator], name="gan")
         self.generator.compile(
             optimizer=self.optimizer,
@@ -57,7 +57,7 @@ class GANRegressor(tf.keras.Model):
         super(GANRegressor, self).build(input_shape)
     def _build_generator(self):
-        generator = tf.keras.Sequential()
+        generator = tf.keras.Sequential(name="generator")
         generator.add(
             tf.keras.layers.Dense(
                 self.num_neurons,
@@ -78,7 +78,7 @@ class GANRegressor(tf.keras.Model):
         return generator
     def _build_discriminator(self):
-        discriminator = tf.keras.Sequential()
+        discriminator = tf.keras.Sequential(name="discriminator")
         for _ in range(self.depth):
             discriminator.add(
                 tf.keras.layers.Dense(
@@ -102,7 +102,7 @@ class GANRegressor(tf.keras.Model):
         Train the GAN model.
         Parameters
-        --------
+        ----------
         X : array-like
             Input data.
         y : array-like
@@ -117,7 +117,7 @@ class GANRegressor(tf.keras.Model):
             Verbosity level. Default is 1.
         Returns
-        --------
+        -------
         history : pd.DataFrame
             Training history.
         """
@@ -234,7 +234,7 @@ class GANRegressor(tf.keras.Model):
         Train the generator model.
         Parameters
-        --------
+        ----------
         X_train : array-like
             Training data.
         y_train : array-like
@@ -249,7 +249,7 @@ class GANRegressor(tf.keras.Model):
             Number of epochs to wait before early stopping. Default is 3.
         Returns
-        --------
+        -------
         history : pd.DataFrame
             Training history.
         """

likelihood/models/deep/predictor.py CHANGED Viewed

@@ -109,15 +109,16 @@ class GetInsights:
                 "in the model's transformation.</p>"
             )
         )
-        self.viz_encoder_decoder_graphs(threshold_factor=threshold_factor, top_k=top_k)
-        display(HTML("<h2 style='margin-top:30px;'>🧠 Classifier Layer Graphs</h2>"))
-        display(
-            HTML(
-                "<p>This visualization shows how features propagate through each dense layer in the classifier. "
-                "Only the strongest weighted connections are shown to highlight influential paths through the network.</p>"
+        if not self.model.encoder.name.startswith("vae"):
+            self.viz_encoder_decoder_graphs(threshold_factor=threshold_factor, top_k=top_k)
+            display(HTML("<h2 style='margin-top:30px;'>🧠 Classifier Layer Graphs</h2>"))
+            display(
+                HTML(
+                    "<p>This visualization shows how features propagate through each dense layer in the classifier. "
+                    "Only the strongest weighted connections are shown to highlight influential paths through the network.</p>"
+                )
             )
-        )
         self.viz_classifier_graphs(threshold_factor=threshold_factor, top_k=top_k)
         display(HTML("<h2 style='margin-top:30px;'>📈 Statistical Summary</h2>"))
@@ -673,6 +674,7 @@ class GetInsights:
             / (data_normalized.iloc[:, :-1].max() - data_normalized.iloc[:, :-1].min())
             - 1
         )
+        data_normalized.dropna(axis=1, inplace=True)
         radviz(data_normalized, color_column, color=self.colors)
         plt.title(title)
         plt.show()

likelihood/models/deep/rl.py ADDED Viewed

@@ -0,0 +1,350 @@
+import random
+from collections import deque
+import numpy as np
+import tensorflow as tf
+from packaging import version
+if version.parse(tf.__version__) > version.parse("2.15.0"):
+    from ._autoencoders import AutoClassifier
+else:
+    from .autoencoders import AutoClassifier
+def print_progress_bar(iteration, total, length=30):
+    percent = f"{100 * (iteration / float(total)):.1f}"
+    filled_length = int(length * iteration // total)
+    bar = "█" * filled_length + "-" * (length - filled_length)
+    print(f"\rProgress: |{bar}| {percent}% Complete", end="\r")
+    if iteration == total:
+        print()
+class Env:
+    def __init__(self, model, maxlen=100, name="likenasium"):
+        """
+        Initialize the environment with a model.
+        Parameters
+        ----------
+            model : Any
+                Model with `.predict()` method (e.g., Keras model).
+            maxlen : int
+                Maximum length of deque. By default it is set to `100`.
+            name : str
+                The name of the environment. By default it is set to `likenasium`.
+        """
+        self.model = model
+        self.maxlen = maxlen
+        self.transitions = deque(
+            maxlen=self.maxlen
+        )  # Stores (state, action, reward, next_action, done)
+        self.current_state = None
+        self.current_step = 0
+        self.done = False
+    def step(self, state, action, verbose=0):
+        """
+        Perform an environment step with the given action.
+        Parameters
+        ----------
+            state : `np.ndarray`
+                Current state to process (input to the model).
+            action : int
+                Expected action to process.
+        Returns
+        -------
+            tuple: (current_state, action_pred, reward, next_action, done)
+        """
+        if self.done:
+            return None, None, 0, None, True
+        # Process action through model
+        model_output = self.model.predict(state.reshape((1, -1)), verbose=verbose)
+        action_pred = np.argmax(model_output, axis=1)[0]
+        model_output[:, action_pred] = 0.0
+        next_action = np.max(model_output, axis=1)[0]  # Second most probable action
+        # Calculate reward (1 if correct prediction, 0 otherwise)
+        reward = 1 if action_pred == action else 0
+        # Update current state
+        self.current_state = state
+        self.current_step += 1
+        # Add transition to history
+        if self.current_step <= self.maxlen:
+            self.transitions.append(
+                (
+                    self.current_state,  # Previous state
+                    action_pred,  # Current action
+                    reward,  # Reward
+                    next_action,  # Next action
+                    self.done,  # Done flag
+                )
+            )
+        return self.current_state, action_pred, reward, next_action, self.done
+    def reset(self):
+        """Reset the environment to initial state."""
+        self.current_state = None
+        self.current_step = 0
+        self.done = False
+        self.transitions = deque(maxlen=self.maxlen)
+        return self.current_state
+    def get_transitions(self):
+        """Get all stored transitions."""
+        return self.transitions
+class AutoQL:
+    """
+    AutoQL: A reinforcement learning agent using Q-learning with Epsilon-greedy policy.
+    This class implements a Q-learning agent with:
+    - Epsilon-greedy policy for exploration
+    - Replay buffer for experience replay
+    - Automatic model version handling for TensorFlow
+    """
+    def __init__(
+        self,
+        env,
+        model,
+        maxlen=2000,
+    ):
+        """Initialize AutoQL agent
+        Parameters
+        ----------
+        env : Any
+            The environment to interact with
+        model : tf.keras.Model
+            The Q-network model
+        """
+        self.env = env
+        self.model = model
+        self.maxlen = maxlen
+        self.replay_buffer = deque(maxlen=self.maxlen)
+    def epsilon_greedy_policy(self, state, action, epsilon=0):
+        """
+        Epsilon-greedy policy for action selection
+        Parameters
+        ----------
+            state : `np.ndarray`
+                Current state.
+            action : int
+                Expected action to process.
+            epsilon : float
+                Exploration probability. By default it is set to `0`
+        Returns
+        -------
+            tuple: (state, action, reward, next_action, done)
+        """
+        current_state, value, reward, next_action, done = self.env.step(state, action)
+        if np.random.rand() > epsilon:
+            state = np.asarray(state).astype(np.float32)
+            return current_state, value, reward, next_action, done
+        step_ = random.sample(self.env.get_transitions(), 1)
+        _state, greedy_action, _reward, _next_action, _done = zip(*step_)
+        return _state[0], greedy_action[0], _reward[0], _next_action[0], _done[0]
+    def play_one_step(self, state, action, epsilon):
+        """
+        Perform one step in the environment and add experience to buffer
+        Parameters
+        ----------
+            state : `np.ndarray`
+                Current state
+            action : int
+                Expected action to process.
+            epsilon : float
+                Exploration probability.
+        Returns
+        -------
+            tuple: (state, action, reward, next_action, done)
+        """
+        current_state, greedy_action, reward, next_action, done = self.epsilon_greedy_policy(
+            state, action, epsilon
+        )
+        done = 1 if done else 0
+        # Add experience to replay buffer
+        self.replay_buffer.append(
+            (
+                current_state,  # Previous state
+                greedy_action,  # Current action
+                reward,  # Reward
+                next_action,  # Next action
+                done,  # Done flag
+            )
+        )
+        return current_state, greedy_action, reward, next_action, done
+    @tf.function
+    def _training_step(self):
+        """
+        Perform one training step using experience replay
+        Returns
+        -------
+            float: Training loss
+        """
+        batch_ = random.sample(self.replay_buffer, self.batch_size)
+        states, actions, rewards, next_actions, dones = zip(*batch_)
+        states = np.array(states).reshape(self.batch_size, -1)
+        actions = np.array(actions).reshape(
+            self.batch_size,
+        )
+        rewards = np.array(rewards).reshape(
+            self.batch_size,
+        )
+        max_next_Q_values = np.array(next_actions).reshape(self.batch_size, -1)
+        dones = np.array(dones).reshape(
+            self.batch_size,
+        )
+        target_Q_values = rewards + (1 - dones) * self.gamma * max_next_Q_values
+        actions = tf.convert_to_tensor(actions, dtype=tf.int32)
+        states = tf.convert_to_tensor(states, dtype=tf.float32)
+        target_Q_values = tf.convert_to_tensor(target_Q_values, dtype=tf.float32)
+        with tf.GradientTape() as tape:
+            all_Q_values = self.model(states)
+            indices = tf.stack([tf.range(tf.shape(actions)[0]), actions], axis=1)
+            Q_values = tf.gather_nd(all_Q_values, indices)
+            loss = tf.reduce_mean(self.loss_fn(target_Q_values, Q_values))
+        grads = tape.gradient(loss, self.model.trainable_variables)
+        self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
+        return loss
+    def train(
+        self,
+        x_data,
+        y_data,
+        optimizer="adam",
+        loss_fn="mse",
+        num_episodes=50,
+        num_steps=100,
+        gamma=0.7,
+        batch_size=32,
+        patience=10,
+        alpha=0.01,
+    ):
+        """Train the agent for a fixed number of episodes
+        Parameters
+        ----------
+        optimizer : str
+            The optimizer for training (e.g., `sgd`). By default it is set to `adam`.
+        loss_fn : str
+            The loss function. By default it is set to `mse`.
+        num_episodes : int
+            Total number of episodes to train. By default it is set to `50`.
+        num_steps : int
+            Steps per episode. By default it is set to `100`. If `num_steps` is less than `self.env.maxlen`, then the second will be chosen.
+        gamma : float
+            Discount factor. By default it is set to `0.7`.
+        batch_size : int
+            Size of training batches. By default it is set to `32`.
+        patience : int
+            How many episodes to wait for improvement.
+        alpha : float
+            Trade-off factor between loss and reward.
+        """
+        rewards = []
+        self.best_weights = None
+        self.best_loss = float("inf")
+        optimizers = {
+            "sgd": tf.keras.optimizers.SGD(),
+            "adam": tf.keras.optimizers.Adam(),
+            "adamw": tf.keras.optimizers.AdamW(),
+            "adadelta": tf.keras.optimizers.Adadelta(),
+            "rmsprop": tf.keras.optimizers.RMSprop(),
+        }
+        self.optimizer = optimizers[optimizer]
+        losses = {
+            "mse": tf.keras.losses.MeanSquaredError(),
+            "mae": tf.keras.losses.MeanAbsoluteError(),
+            "mape": tf.keras.losses.MeanAbsolutePercentageError(),
+        }
+        self.loss_fn = losses[loss_fn]
+        self.num_episodes = num_episodes
+        self.num_steps = num_steps if num_steps >= self.env.maxlen else self.env.maxlen
+        self.gamma = gamma
+        self.batch_size = batch_size
+        loss = float("inf")
+        no_improve_count = 0
+        best_combined_metric = float("inf")
+        for episode in range(self.num_episodes):
+            print_progress_bar(episode + 1, self.num_episodes)
+            self.env.reset()
+            sum_rewards = 0
+            epsilon = max(1 - episode / (self.num_episodes * 0.8), 0.01)
+            for step in range(self.num_steps):
+                state, action, reward, next_action, done = self.play_one_step(
+                    x_data[step], y_data[step], epsilon
+                )
+                sum_rewards += reward if isinstance(reward, int) else reward[0]
+                # Train if buffer has enough samples
+                if len(self.replay_buffer) > self.batch_size:
+                    loss = self._training_step()
+                if done:
+                    break
+            combined_metric = loss - alpha * sum_rewards
+            if combined_metric < best_combined_metric:
+                best_combined_metric = combined_metric
+                self.best_weights = self.model.get_weights()
+                self.best_loss = loss
+                no_improve_count = 0  # Reset counter on improvement
+            else:
+                no_improve_count += 1
+            rewards.append(sum_rewards)
+            # Logging
+            if episode % (self.num_episodes // 10) == 0:
+                print(
+                    f"Episode: {episode}, Steps: {step+1}, Epsilon: {epsilon:.3f}, Loss: {loss:.2e}, Reward: {sum_rewards}, No Improve Count: {no_improve_count}"
+                )
+            # Early stopping condition
+            if no_improve_count >= patience:
+                print(
+                    f"Early stopping at episode {episode} due to no improvement in {patience} episodes."
+                )
+                break
+        # Save best model
+        self.model.set_weights(self.best_weights)
+    def __str__(self):
+        return (
+            f"AutoQL (Env: {self.env.name}, Episodes: {self.num_episodes}, Steps: {self.num_steps})"
+        )
+if __name__ == "__main__":
+    pass

likelihood/models/simulation.py CHANGED Viewed

@@ -4,11 +4,15 @@ from typing import Dict, List, Tuple, Union
 import numpy as np
 import pandas as pd
+from packaging import version
 from pandas.core.frame import DataFrame
 from likelihood.tools import DataScaler, FeatureSelection, OneHotEncoder, cdf, check_nan_inf
-warnings.simplefilter("ignore", np.RankWarning)
+if version.parse(np.__version__) < version.parse("2.0.0"):
+    filter = np.RankWarning
+else:
+    filter = np.exceptions.RankWarning
 # --------------------------------------------------------------------------------------------------------------------------------------
@@ -128,14 +132,15 @@ class SimulationEngine(FeatureSelection):
             )
             poly = kwargs.get("poly", 9)
             plot = kwargs.get("plot", False)
+            bandwidth = kwargs.get("bandwidth", 1.5)
             if not x[1]:
                 media = self.df[key].mean()
                 standard_deviation = self.df[key].std()
-                lower_limit = media - 1.5 * standard_deviation
-                upper_limit = media + 1.5 * standard_deviation
+                lower_limit = media - bandwidth * standard_deviation
+                upper_limit = media + bandwidth * standard_deviation
                 if plot:
                     print(f"Cumulative Distribution Function ({key})")
-                f, cdf_, ox = cdf(x[0].flatten(), poly=poly, plot=plot)
+                f, _, ox = cdf(x[0].flatten(), poly=poly, plot=plot)
             else:
                 f, ox = None, None
                 least_frequent_category, most_frequent_category = categories_by_quartile(

likelihood 1.5.7__py3-none-any.whl → 2.0.0__py3-none-any.whl

likelihood 1.5.7py3-none-any.whl → 2.0.0py3-none-any.whl