PyPI - likelihood - Versions diffs - 1.2.23__tar.gz → 1.2.24__tar.gz - Mend

likelihood 1.2.23tar.gz → 1.2.24tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{likelihood-1.2.23 → likelihood-1.2.24}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: likelihood
-Version: 1.2.23
+Version: 1.2.24
 Summary: A package that performs the maximum likelihood algorithm.
 Home-page: https://github.com/jzsmoreno/likelihood/
 Author: J. A. Moreno-Guerra

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood/graph/nn.py RENAMED Viewed

@@ -1,6 +1,8 @@
 import os
 os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
+# Suppress TensorFlow INFO logs
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
 import logging
 import warnings
 from typing import List, Tuple
@@ -9,7 +11,6 @@ import numpy as np
 import pandas as pd
 import tensorflow as tf
 from IPython.display import clear_output
-from numpy import ndarray
 from pandas.core.frame import DataFrame
 from sklearn.metrics import f1_score
 from sklearn.model_selection import train_test_split
@@ -21,7 +22,7 @@ logging.getLogger("tensorflow").setLevel(logging.ERROR)
 tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
-def compare_similarity(arr1: ndarray, arr2: ndarray) -> int:
+def compare_similarity(arr1: np.ndarray, arr2: np.ndarray) -> int:
     """Compares the similarity between two arrays of categories.
     Parameters
@@ -44,9 +45,9 @@ def compare_similarity(arr1: ndarray, arr2: ndarray) -> int:
     return count
-def cal_adjency_matrix(
+def cal_adjacency_matrix(
     df: DataFrame, exclude_subset: List[str] = [], sparse: bool = True, **kwargs
-) -> Tuple[dict, ndarray]:
+) -> Tuple[dict, np.ndarray]:
     """Calculates the adjacency matrix for a given DataFrame.
     The adjacency matrix is a matrix that represents the similarity between each pair of categories.
     The similarity is calculated using the `compare_similarity` function.
@@ -133,7 +134,7 @@ class Data:
         target: str | None = None,
         exclude_subset: List[str] = [],
     ):
-        _, adjacency = cal_adjency_matrix(df, exclude_subset=exclude_subset, sparse=True)
+        _, adjacency = cal_adjacency_matrix(df, exclude_subset=exclude_subset, sparse=True)
         if target is not None:
             X = df.drop(columns=[target] + exclude_subset)
         else:

likelihood-1.2.24/likelihood/models/hmm.py ADDED Viewed

@@ -0,0 +1,163 @@
+import logging
+import os
+import pickle
+from typing import List, Tuple
+import numpy as np
+from IPython.display import clear_output
+class HMM:
+    def __init__(self, n_states: int, n_observations: int):
+        self.n_states = n_states
+        self.n_observations = n_observations
+        # Initialize parameters with random values
+        self.pi = np.random.dirichlet(np.ones(n_states), size=1)[0]
+        self.A = np.random.dirichlet(np.ones(n_states), size=n_states)
+        self.B = np.random.dirichlet(np.ones(n_observations), size=n_states)
+    def save_model(self, filename: str = "./hmm") -> None:
+        filename = filename if filename.endswith(".pkl") else filename + ".pkl"
+        with open(filename, "wb") as f:
+            pickle.dump(self, f)
+    @staticmethod
+    def load_model(filename: str = "./hmm") -> "HMM":
+        filename = filename + ".pkl" if not filename.endswith(".pkl") else filename
+        with open(filename, "rb") as f:
+            return pickle.load(f)
+    def forward(self, sequence: List[int]) -> np.ndarray:
+        T = len(sequence)
+        alpha = np.zeros((T, self.n_states))
+        # Add a small constant (smoothing) to avoid log(0)
+        epsilon = 1e-10  # Small value to avoid taking log(0)
+        # Initialization (log-space)
+        alpha[0] = np.log(self.pi + epsilon) + np.log(self.B[:, sequence[0]] + epsilon)
+        alpha[0] -= np.log(np.sum(np.exp(alpha[0])))  # Normalization (log-space)
+        # Recursion (log-space)
+        for t in range(1, T):
+            for i in range(self.n_states):
+                alpha[t, i] = np.log(
+                    np.sum(np.exp(alpha[t - 1] + np.log(self.A[:, i] + epsilon)))
+                ) + np.log(self.B[i, sequence[t]] + epsilon)
+            alpha[t] -= np.log(np.sum(np.exp(alpha[t])))  # Normalization
+        return alpha
+    def backward(self, sequence: List[int]) -> np.ndarray:
+        T = len(sequence)
+        beta = np.ones((T, self.n_states))
+        # Backward recursion
+        for t in range(T - 2, -1, -1):
+            for i in range(self.n_states):
+                beta[t, i] = np.sum(self.A[i] * self.B[:, sequence[t + 1]] * beta[t + 1])
+        return beta
+    def viterbi(self, sequence: List[int]) -> np.ndarray:
+        T = len(sequence)
+        delta = np.zeros((T, self.n_states))
+        psi = np.zeros((T, self.n_states), dtype=int)
+        # Initialization
+        delta[0] = self.pi * self.B[:, sequence[0]]
+        # Recursion
+        for t in range(1, T):
+            for i in range(self.n_states):
+                delta[t, i] = np.max(delta[t - 1] * self.A[:, i]) * self.B[i, sequence[t]]
+                psi[t, i] = np.argmax(delta[t - 1] * self.A[:, i])
+        # Reconstruct the most probable path
+        state_sequence = np.zeros(T, dtype=int)
+        state_sequence[T - 1] = np.argmax(delta[T - 1])
+        for t in range(T - 2, -1, -1):
+            state_sequence[t] = psi[t + 1, state_sequence[t + 1]]
+        return state_sequence
+    def baum_welch(
+        self, sequences: List[List[int]], n_iterations: int, verbose: bool = False
+    ) -> None:
+        for iteration in range(n_iterations):
+            # Initialize accumulators
+            A_num = np.zeros((self.n_states, self.n_states))
+            B_num = np.zeros((self.n_states, self.n_observations))
+            pi_num = np.zeros(self.n_states)
+            for sequence in sequences:
+                T = len(sequence)
+                alpha = self.forward(sequence)
+                beta = self.backward(sequence)
+                # Update pi
+                gamma = (alpha * beta) / np.sum(alpha * beta, axis=1, keepdims=True)
+                pi_num += gamma[0]
+                # Update A and B
+                for t in range(T - 1):
+                    xi = np.zeros((self.n_states, self.n_states))
+                    denom = np.sum(alpha[t] * self.A * self.B[:, sequence[t + 1]] * beta[t + 1])
+                    for i in range(self.n_states):
+                        for j in range(self.n_states):
+                            xi[i, j] = (
+                                alpha[t, i]
+                                * self.A[i, j]
+                                * self.B[j, sequence[t + 1]]
+                                * beta[t + 1, j]
+                            ) / denom
+                        A_num[i] += xi[i]
+                    B_num[:, sequence[t]] += gamma[t]
+                # For the last step of the sequence
+                B_num[:, sequence[-1]] += gamma[-1]
+            # Normalize and update parameters
+            self.pi = pi_num / len(sequences)
+            self.A = A_num / np.sum(A_num, axis=1, keepdims=True)
+            self.B = B_num / np.sum(B_num, axis=1, keepdims=True)
+            # Logging parameters every 10 iterations
+            if iteration % 10 == 0 and verbose:
+                os.system("cls" if os.name == "nt" else "clear")
+                clear_output(wait=True)
+                logging.info(f"Iteration {iteration}:")
+                logging.info("Pi: %s", self.pi)
+                logging.info("A:\n%s", self.A)
+                logging.info("B:\n%s", self.B)
+    def decoding_accuracy(self, sequences: List[List[int]], true_states: List[List[int]]) -> float:
+        correct_predictions = 0
+        total_predictions = 0
+        for sequence, true_state in zip(sequences, true_states):
+            predicted_states = self.viterbi(sequence)
+            correct_predictions += np.sum(predicted_states == true_state)
+            total_predictions += len(sequence)
+        accuracy = (correct_predictions / total_predictions) * 100
+        return accuracy
+    def state_probabilities(self, sequence: List[int]) -> np.ndarray:
+        """
+        Returns the smoothed probabilities of the hidden states at each time step.
+        This is done by using both forward and backward probabilities.
+        """
+        alpha = self.forward(sequence)
+        beta = self.backward(sequence)
+        # Compute smoothed probabilities (gamma)
+        smoothed_probs = (alpha * beta) / np.sum(alpha * beta, axis=1, keepdims=True)
+        return smoothed_probs
+    def sequence_probability(self, sequence: List[int]) -> np.ndarray:
+        return self.state_probabilities(sequence)[-1]

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood/models/simulation.py RENAMED Viewed

@@ -5,7 +5,6 @@ from typing import List, Tuple, Union
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from numpy import ndarray
 from pandas.core.frame import DataFrame
 from likelihood.tools import DataScaler, FeatureSelection, OneHotEncoder, cdf, check_nan_inf
@@ -66,12 +65,12 @@ class SimulationEngine(FeatureSelection):
         super().__init__(**kwargs)
-    def predict(self, df: DataFrame, column: str) -> ndarray | list:
+    def predict(self, df: DataFrame, column: str) -> np.ndarray | list:
         # Let us assign the dictionary entries corresponding to the column
         w, quick_encoder, names_cols, dfe, numeric_dict = self.w_dict[column]
         df = df[names_cols].copy()
-        # Change the scale of the dataframe
+        # Change the scale of the DataFrame
         dataset = self.df.copy()
         dataset.drop(columns=column, inplace=True)
         numeric_df = dataset.select_dtypes(include="number")
@@ -85,7 +84,7 @@ class SimulationEngine(FeatureSelection):
             for col in numeric_df.columns:
                 df[col] = numeric_df[col].values
-        # Encoding the datadrame
+        # Encoding the DataFrame
         for num, colname in enumerate(dfe._encode_columns):
             if df[colname].dtype == "object":
                 encode_dict = dfe.encoding_list[num]
@@ -93,7 +92,7 @@ class SimulationEngine(FeatureSelection):
                     dfe._code_transformation_to, dictionary_list=encode_dict
                 )
-        # PREDICTION
+        # Prediction
         y = df.to_numpy() @ w
         # Categorical column
@@ -113,7 +112,7 @@ class SimulationEngine(FeatureSelection):
         return y[:]
-    def _encode(self, df: DataFrame) -> ndarray | list:
+    def _encode(self, df: DataFrame) -> np.ndarray | list:
         df = df.copy()
         column = df.columns[0]
         frec = df[column].value_counts() / len(df)

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: likelihood
-Version: 1.2.23
+Version: 1.2.24
 Summary: A package that performs the maximum likelihood algorithm.
 Home-page: https://github.com/jzsmoreno/likelihood/
 Author: J. A. Moreno-Guerra

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood.egg-info/SOURCES.txt RENAMED Viewed

@@ -12,6 +12,7 @@ likelihood/graph/__init__.py
 likelihood/graph/graph.py
 likelihood/graph/nn.py
 likelihood/models/__init__.py
+likelihood/models/hmm.py
 likelihood/models/regression.py
 likelihood/models/simulation.py
 likelihood/models/utils.py

{likelihood-1.2.23 → likelihood-1.2.24}/LICENSE RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/README.md RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood/__init__.py RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood/graph/__init__.py RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood/graph/graph.py RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood/main.py RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood/models/__init__.py RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood/models/deep/__init__.py RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood/models/deep/autoencoders.py RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood/models/regression.py RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood/models/utils.py RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood/tools/__init__.py RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood/tools/numeric_tools.py RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood/tools/tools.py RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood.egg-info/requires.txt RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/likelihood.egg-info/top_level.txt RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/setup.cfg RENAMED Viewed

File without changes

{likelihood-1.2.23 → likelihood-1.2.24}/setup.py RENAMED Viewed

File without changes

likelihood 1.2.23__tar.gz → 1.2.24__tar.gz

likelihood 1.2.23tar.gz → 1.2.24tar.gz