PyPI - likelihood - Versions diffs - 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

likelihood 1.4.1py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

likelihood/graph/nn.py +8 -2
likelihood/models/deep/autoencoders.py +312 -109
likelihood/tools/figures.py +348 -0
likelihood/tools/models_tools.py +161 -9
likelihood/tools/tools.py +26 -84
{likelihood-1.4.1.dist-info → likelihood-1.5.0.dist-info}/METADATA +1 -1
{likelihood-1.4.1.dist-info → likelihood-1.5.0.dist-info}/RECORD +10 -9
{likelihood-1.4.1.dist-info → likelihood-1.5.0.dist-info}/WHEEL +1 -1
{likelihood-1.4.1.dist-info → likelihood-1.5.0.dist-info}/LICENSE +0 -0
{likelihood-1.4.1.dist-info → likelihood-1.5.0.dist-info}/top_level.txt +0 -0

likelihood/tools/tools.py CHANGED Viewed

@@ -169,7 +169,6 @@ def generate_feature_yaml(
     return feature_info
-# a function that calculates the percentage of missing values per column is defined
 def cal_missing_values(df: DataFrame) -> None:
     """Calculate the percentage of missing (`NaN`/`NaT`) values per column in a dataframe.
@@ -180,8 +179,7 @@ def cal_missing_values(df: DataFrame) -> None:
     Returns
     -------
-    `None`
-        Prints out a table with columns as index and percentages of missing values as data.
+    `None` : Prints out a table with columns as index and percentages of missing values as data.
     """
     col = df.columns
@@ -226,7 +224,6 @@ def cdf(
     cdf_values = np.cumsum(x) / np.sum(x)
     sorted_x = np.sort(x)
-    # Calculate the CDF or inverse CDF (quantile function)
     probabilities = np.linspace(0, 1, len(sorted_x))
     if inv:
@@ -281,7 +278,6 @@ def calculate_probability(x: np.ndarray, points: int = 1, cond: bool = True) ->
     fit, _, sorted_x = cdf(x)
     p = fit(x)
-    # Validate probability values
     if cond:
         prob_value = np.prod(p[-points])
         message = "product"
@@ -304,7 +300,7 @@ class CorrelationBase:
     def __init__(self, x: np.ndarray, y: Union[np.ndarray, None] = None):
         self.x = x
-        self.y = y if y is not None else x  # Default to autocorrelation if y is not provided
+        self.y = y if y is not None else x
         self._compute_correlation()
         self.z = self.result[self.result.size // 2 :]
         self.z /= np.abs(self.z).max()
@@ -395,7 +391,6 @@ def fft_denoise(
     denoised_dataset = np.zeros_like(dataset)
     periods = np.zeros(num_samples)
-    # Precompute values that do not change within the loop
     freq = (1 / n_points) * np.arange(n_points)
     L = np.arange(1, np.floor(n_points / 2), dtype=int)
@@ -405,15 +400,12 @@ def fft_denoise(
         threshold = np.mean(PSD) + sigma * np.std(PSD)
         indices = PSD > threshold
-        # Zero out all others in frequency domain
         PSDclean = PSD * indices
         fhat_cleaned = fhat * indices
-        # Inverse FFT for filtered time signal
         denoised_signal = np.fft.ifft(fhat_cleaned).real
         denoised_dataset[i, :] = denoised_signal
-        # Calculate the period of the signal
         peak_index = L[np.argmax(np.abs(fhat[L]))]
         periods[i] = 1 / (2 * freq[peak_index])
@@ -430,33 +422,27 @@ def get_period(dataset: np.ndarray) -> float:
     Parameters
     ----------
     dataset : `ndarray`
-        the `dataset` describing the function over which the period is calculated
+        the `dataset` describing the function over which the period is calculated.
     Returns
     -------
     period : `float`
-        period of the function described by the `dataset`
+        period of the function described by the `dataset`.
     """
     n = dataset.size
-    # Ensure there are enough points for FFT analysis
     if n < 2:
         raise ValueError("Dataset must contain at least two points.")
-    # Compute the FFT and PSD
-    fhat = np.fft.rfft(dataset)  # Use rfft for real-valued input to save computation
-    freqs = np.fft.rfftfreq(n)  # Get only positive frequencies
+    fhat = np.fft.rfft(dataset)
+    freqs = np.fft.rfftfreq(n)
-    # Calculate the Power Spectral Density (PSD)
     PSD = np.abs(fhat) ** 2 / n
-    # Remove the first frequency component (DC component)
     PSD[0] = 0
-    # Find the index of the maximum PSD value, excluding the DC component
     max_psd_index = np.argmax(PSD)
-    # Calculate the period based on the corresponding frequency
     dominant_freq = freqs[max_psd_index]
     if dominant_freq == 0:
         raise ValueError("No significant periodic component found in the dataset.")
@@ -472,12 +458,12 @@ def sigmoide_inv(y: float) -> float:
     Parameters
     ----------
     y : `float`
-        the number to evaluate the function
+        the number to evaluate the function.
     Returns
     -------
     `float`
-        value of evaluated function
+        value of evaluated function.
     """
     return math.log(y / (1 - y))
@@ -540,6 +526,10 @@ class LogisticRegression:
         datapoints : `np.array`
             An array containing the values of the independent variable.
+        Returns
+        -------
+        `np.array`
         """
         sig = np.vectorize(sigmoide)
@@ -558,8 +548,6 @@ class LogisticRegression:
         -------
         importance : `np.array`
             An array containing the importance of each feature.
         """
         if print_important_features:
             for i, a in enumerate(self.importance):
@@ -589,9 +577,7 @@ class LinearRegression:
         Returns
         -------
-        importance : `np.array`
-            An array containing the importance of each feature.
+        `None` : The function doesn't return anything.
         """
         self.X = dataset
@@ -635,8 +621,6 @@ class LinearRegression:
         -------
         importance : `np.array`
             An array containing the importance of each feature.
         """
         if print_important_features:
             for i, a in enumerate(self.importance):
@@ -658,7 +642,6 @@ def cal_average(y: np.ndarray, alpha: float = 1):
     -------
     average : `float`
         The average of the data.
     """
     n = int(alpha * len(y))
@@ -799,7 +782,6 @@ def mean_square_error(y_true: np.ndarray, y_pred: np.ndarray, print_error: bool
     -------
     RMSE : `float`
         The Root Mean Squared Error.
     """
     if print_error:
         print(f"The RMSE is {np.sqrt(np.mean((y_true - y_pred)**2))}")
@@ -975,7 +957,6 @@ class PerformanceMeasures:
     def __init__(self) -> None:
         pass
-    # Performance measure Res_T
     def f_mean(self, y_true: np.ndarray, y_pred: np.ndarray, labels: List[int]) -> float:
         F_vec = self._f1_score(y_true, y_pred, labels)
         mean_f_measure = np.mean(F_vec)
@@ -988,7 +969,6 @@ class PerformanceMeasures:
         return mean_f_measure
-    # Performance measure Res_P
     def resp(self, y_true: np.ndarray, y_pred: np.ndarray, labels: List[int]) -> float:
         T_C = len(y_true)
         sum1, sum2 = 0.0, 0.0
@@ -999,7 +979,7 @@ class PerformanceMeasures:
             sum1 += (1 - class_instances) * F_vec[label_idx]
             sum2 += 1 - class_instances
-        res_p = sum1 / sum2 if sum2 != 0 else 0.0  # Avoid division by zero
+        res_p = sum1 / sum2 if sum2 != 0 else 0.0
         print(f"Metric Res_p -> {res_p}")
         return res_p
@@ -1016,7 +996,6 @@ class PerformanceMeasures:
         sum_cols = np.sum(count_mat, axis=0)
         sum_rows = np.sum(count_mat, axis=1)
-        # Avoid division by zero
         precision = np.divide(
             count_mat.diagonal(), sum_cols, out=np.zeros_like(sum_cols), where=sum_cols != 0
         )
@@ -1028,7 +1007,6 @@ class PerformanceMeasures:
         return f1_vec
-    # Returns confusion matrix of predictions
     def _confu_mat(self, y_true: np.ndarray, y_pred: np.ndarray, labels: List[int]) -> np.ndarray:
         num_classes = len(labels)
         label_mapping = {label: idx for idx, label in enumerate(labels)}
@@ -1056,21 +1034,18 @@ class OneHotEncoder:
         self.x = x
         if not isinstance(self.x, np.ndarray):
-            self.x = np.array(self.x)  # If not numpy array then convert it
+            self.x = np.array(self.x)
-        y = np.zeros(
-            (self.x.size, self.x.max() + 1)
-        )  # Build matrix of (size num of entries) x (max value + 1)
+        y = np.zeros((self.x.size, self.x.max() + 1))
-        y[np.arange(self.x.size), self.x] = 1  # Label with ones
+        y[np.arange(self.x.size), self.x] = 1
         return y
     def decode(self, x: np.ndarray | list) -> np.ndarray:
         if not isinstance(x, np.ndarray):
-            x = np.array(x)  # If not numpy array then convert it
+            x = np.array(x)
-        # We return the max values of each row
         y = np.argmax(x, axis=1)
         return y
@@ -1107,13 +1082,11 @@ class FeatureSelection:
         `str`
             A string representation of the directed graph.
         """
-        # Assign and clean dataset
         self._load_data(dataset)
         curr_dataset = self.X
         columns = list(curr_dataset.columns)
-        # We construct string from causal_graph
         feature_string = " digraph { "
         for column in columns:
             feature_string += column + "; "
@@ -1125,85 +1098,53 @@ class FeatureSelection:
             numeric_df = pd.DataFrame(numeric_scaled.T, columns=numeric_df.columns)
             curr_dataset[numeric_df.columns] = numeric_df
-        # We construct dictionary to save index for scaling
         numeric_dict = dict(zip(list(numeric_df.columns), range(len(list(numeric_df.columns)))))
-        # Iterate over all the columns to obtain their importances.
         for index_column, column in enumerate(columns):
-            # Variable to predict
             Y = curr_dataset[column]
-            # We check whether it is numerical or categorical.
             column_type = Y.dtype
             if column_type != "object":
-                # Linear regression model
                 Model = LinearRegression()
-                # Auxiliary dataset without the column in question
                 X_aux = curr_dataset.drop([column], axis=1)
-                # We encode
                 dfe = DataFrameEncoder(X_aux)
                 encoded_df = dfe.encode(save_mode=False)
-                # We train
                 Model.fit(encoded_df.to_numpy().T, Y.to_numpy().T)
-                # We obtain importance
                 importance = Model.get_importances()
                 w = Model.w
             else:
                 Model = LogisticRegression()
                 num_unique_entries = curr_dataset[column].nunique()
                 quick_encoder = DataFrameEncoder(Y.to_frame())
                 encoded_Y = quick_encoder.encode(save_mode=False)
-                # Mapping to one-hot
                 one_hot = OneHotEncoder()
                 train_y = one_hot.encode(encoded_Y[column])
-                # PASSING 0 -> 0.5 and 1 -> 0.73105
                 for i in range(len(train_y)):
                     for j in range(num_unique_entries):
                         if train_y[i][j] == 1.0:
                             train_y[i][j] = 0.73105
                         else:
                             train_y[i][j] = 0.5
-                # Delete the column in question
                 X_aux = curr_dataset.drop([column], axis=1)
-                # We encode
                 dfe = DataFrameEncoder(X_aux)
                 encoded_df = dfe.encode(save_mode=False)
-                # We train
                 Model.fit(encoded_df.to_numpy().T, train_y)
-                # We obtain importance
                 importance = Model.get_importances()
                 w = Model.w
-            # We obtain the $n$ most important ones
             top_n_indexes = sorted(
                 range(len(importance)), key=lambda i: importance[i], reverse=True
             )[:n_importances]
-            # We build the string for the column in question
             names_cols = list(X_aux.columns)
-            # We store the indices, values and column names in a list of tuples.
             features_imp_node = [
                 (names_cols[top_n_indexes[i]], importance[top_n_indexes[i]])
                 for i in range(n_importances)
             ]
-            # We store w's for predictions
             if column_type != "object":
                 self.w_dict[column] = (w, None, names_cols, dfe, numeric_dict)
             else:
                 self.w_dict[column] = (w, quick_encoder, names_cols, dfe, numeric_dict)
-            # Add to general list
             self.all_features_imp_graph.append((column, features_imp_node))
-            # We format it
             for i in top_n_indexes:
                 feature_string += names_cols[i] + " -> "
@@ -1212,10 +1153,8 @@ class FeatureSelection:
         return feature_string + "} "
     def _load_data(self, dataset: DataFrame):
-        # Assign data and clean dataset of unneeded columns
         if len(self.not_features) > 0:
-            # We remove unnecessary columns
             self.X = dataset.drop(columns=self.not_features)
         else:
@@ -1232,11 +1171,15 @@ def check_nan_inf(df: DataFrame) -> DataFrame:
     """
     Checks for NaN and Inf values in the DataFrame. If any are found, they will be removed.
-    Parameters:
-        df (DataFrame): The input DataFrame to be checked.
+    Parameters
+    ----------
+    df : DataFrame
+        The input DataFrame to be checked.
-    Returns:
-        DataFrame: A new DataFrame with NaN and Inf values removed.
+    Returns
+    ----------
+    DataFrame
+        A new DataFrame with NaN and Inf values removed.
     """
     nan_values = df.isnull().values.any()
@@ -1272,7 +1215,6 @@ if __name__ == "__main__":
     print(helper.f_mean(y_true, y_pred, labels))
     # Use DataFrameEncoder
-    # Create a DataFrame
     data = {"Name": ["John", "Alice", "Bob", "Jafet", "Beto"], "Age": [25, 30, 35, 21, 28]}
     import pandas as pd

{likelihood-1.4.1.dist-info → likelihood-1.5.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: likelihood
-Version: 1.4.1
+Version: 1.5.0
 Summary: A package that performs the maximum likelihood algorithm.
 Home-page: https://github.com/jzsmoreno/likelihood/
 Author: J. A. Moreno-Guerra

{likelihood-1.4.1.dist-info → likelihood-1.5.0.dist-info}/RECORD RENAMED Viewed

@@ -2,20 +2,21 @@ likelihood/__init__.py,sha256=5C0hapdsk85XZhN_rssRAEFpkRRuKNtj6cyRbqD2_gM,994
 likelihood/main.py,sha256=fcCkGOOWKjfvw2tLVqjuKPV8t0rVCIT9FlbYcOv4EYo,7974
 likelihood/graph/__init__.py,sha256=6TuFDfmXTwpLyHl7_KqBfdzW6zqHjGzIFvymjFPlvjI,21
 likelihood/graph/graph.py,sha256=bLrNMvIh7GOTdPTwnNss8oPZ7cbSHQScAsH_ttmVUK0,3294
-likelihood/graph/nn.py,sha256=MD2M-KgQnrlHg3iS42vrdOnD51-GRk3CJ5CCMQ0DNWI,10763
+likelihood/graph/nn.py,sha256=EaMmboKriCFnkP48_HLGRAsOZSWxwUlMG0WDGZ4ey1o,11035
 likelihood/models/__init__.py,sha256=e6nB4w47w0Q9DrAFeP3OcUgcoHOtf7Il4mBhgf4AARg,52
 likelihood/models/hmm.py,sha256=0s0gFySH1u4NjRaZDxiZ8oeTaFhFrw1x0GJxwy3dFrA,6253
 likelihood/models/regression.py,sha256=9cakyGlJCEO6WfpoKLh3GxdXQeQp7cUvJIkQ5odT0TA,9404
 likelihood/models/simulation.py,sha256=LFyE_szo7sDukviMLeg_6RoyAaI7yMXUy8f4mDOrGoc,8460
 likelihood/models/utils.py,sha256=dvigPi_hxcs5ntfHr7Y1JvP5ULtMW3kkN0nJpS4orE8,1319
 likelihood/models/deep/__init__.py,sha256=-KIPippVaMqgG8mEgYjNxYQdqOUcFhUuKhbVe8TTCfo,28
-likelihood/models/deep/autoencoders.py,sha256=O-H5KLmJvYjuE-b6l97esruihK6djocgxbkO2N1X2RM,39306
+likelihood/models/deep/autoencoders.py,sha256=0EIZwDNlZ9NCfQbhQ_KdXkkRwIjUEU-jk0l0u-J1wmA,44212
 likelihood/tools/__init__.py,sha256=N1IhMDzacsGQT2MIYBMBC0zTxes78vC_0gGrwkuPgmg,78
-likelihood/tools/models_tools.py,sha256=bjwoBlDeW1fUi58yJsuKcaTUTgWhOCNsc24_ESYI3BI,3502
+likelihood/tools/figures.py,sha256=waF0NHIMrctCmaLhcuz5DMcXyRKynmn6aG0XITYCTLc,10940
+likelihood/tools/models_tools.py,sha256=c3-vac-1MYSarYDtfR6XfVC7X_WY9auS7y2_3Z973IQ,8875
 likelihood/tools/numeric_tools.py,sha256=FA44kbiAcxcquz1el_g3Pqsp5ii8XFkAIrsMs5bGkj0,11445
-likelihood/tools/tools.py,sha256=6JLZBHxc4f1lJfw4aBwdS2s16EpydFNqLZF73I7wddQ,44412
-likelihood-1.4.1.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
-likelihood-1.4.1.dist-info/METADATA,sha256=6otKXhthH5ZSUvYfcghD6CaC1skWZ0FBouXsGXuJfZw,2822
-likelihood-1.4.1.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
-likelihood-1.4.1.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
-likelihood-1.4.1.dist-info/RECORD,,
+likelihood/tools/tools.py,sha256=SePaBg-gP29rt5SR2xhqNNQLu7_m0Wner5y_XzdSdpc,42031
+likelihood-1.5.0.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
+likelihood-1.5.0.dist-info/METADATA,sha256=zTpqZ3w7y_vWY2dqQH7JSfROIkC8dbRcLn2LSCAQGc4,2822
+likelihood-1.5.0.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
+likelihood-1.5.0.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
+likelihood-1.5.0.dist-info/RECORD,,

{likelihood-1.4.1.dist-info → likelihood-1.5.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.2)
+Generator: setuptools (76.0.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{likelihood-1.4.1.dist-info → likelihood-1.5.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{likelihood-1.4.1.dist-info → likelihood-1.5.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

likelihood 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl

likelihood 1.4.1py3-none-any.whl → 1.5.0py3-none-any.whl