PyPI - likelihood - Versions diffs - 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl - Mend

likelihood 2.0.0py3-none-any.whl → 2.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

likelihood/models/deep/rl.py CHANGED Viewed

@@ -27,12 +27,12 @@ class Env:
         Parameters
         ----------
-            model : Any
-                Model with `.predict()` method (e.g., Keras model).
-            maxlen : int
-                Maximum length of deque. By default it is set to `100`.
-            name : str
-                The name of the environment. By default it is set to `likenasium`.
+        model : Any
+            Model with `.predict()` method (e.g., Keras model).
+        maxlen : int
+            Maximum length of deque. By default it is set to `100`.
+        name : str
+            The name of the environment. By default it is set to `likenasium`.
         """
         self.model = model
         self.maxlen = maxlen
@@ -49,14 +49,14 @@ class Env:
         Parameters
         ----------
-            state : `np.ndarray`
-                Current state to process (input to the model).
-            action : int
-                Expected action to process.
+        state : `np.ndarray`
+            Current state to process (input to the model).
+        action : `int`
+            Expected action to process.
         Returns
         -------
-            tuple: (current_state, action_pred, reward, next_action, done)
+            `tuple` : (current_state, action_pred, reward, next_action, done)
         """
         if self.done:
             return None, None, 0, None, True
@@ -120,9 +120,9 @@ class AutoQL:
         Parameters
         ----------
-        env : Any
+        env : `Any`
             The environment to interact with
-        model : tf.keras.Model
+        model : `tf.keras.Model`
             The Q-network model
         """
@@ -137,16 +137,16 @@ class AutoQL:
         Parameters
         ----------
-            state : `np.ndarray`
-                Current state.
-            action : int
-                Expected action to process.
-            epsilon : float
-                Exploration probability. By default it is set to `0`
+        state : `np.ndarray`
+            Current state.
+        action : `int`
+            Expected action to process.
+        epsilon : `float`
+            Exploration probability. By default it is set to `0`
         Returns
         -------
-            tuple: (state, action, reward, next_action, done)
+            `tuple` : (state, action, reward, next_action, done)
         """
         current_state, value, reward, next_action, done = self.env.step(state, action)
@@ -164,17 +164,17 @@ class AutoQL:
         Parameters
         ----------
-            state : `np.ndarray`
-                Current state
-            action : int
-                Expected action to process.
+        state : `np.ndarray`
+            Current state
+        action : `int`
+            Expected action to process.
-            epsilon : float
-                Exploration probability.
+        epsilon : `float`
+            Exploration probability.
         Returns
         -------
-            tuple: (state, action, reward, next_action, done)
+            `tuple` : (state, action, reward, next_action, done)
         """
         current_state, greedy_action, reward, next_action, done = self.epsilon_greedy_policy(
             state, action, epsilon
@@ -202,7 +202,7 @@ class AutoQL:
         Returns
         -------
-            float: Training loss
+            `float` : Training loss
         """
         batch_ = random.sample(self.replay_buffer, self.batch_size)
@@ -250,21 +250,21 @@ class AutoQL:
         Parameters
         ----------
-        optimizer : str
+        optimizer : `str`
             The optimizer for training (e.g., `sgd`). By default it is set to `adam`.
-        loss_fn : str
+        loss_fn : `str`
             The loss function. By default it is set to `mse`.
-        num_episodes : int
+        num_episodes : `int`
             Total number of episodes to train. By default it is set to `50`.
-        num_steps : int
+        num_steps : `int`
             Steps per episode. By default it is set to `100`. If `num_steps` is less than `self.env.maxlen`, then the second will be chosen.
-        gamma : float
+        gamma : `float`
             Discount factor. By default it is set to `0.7`.
-        batch_size : int
+        batch_size : `int`
             Size of training batches. By default it is set to `32`.
-        patience : int
+        patience : `int`
             How many episodes to wait for improvement.
-        alpha : float
+        alpha : `float`
             Trade-off factor between loss and reward.
         """
         rewards = []

likelihood/tools/models_tools.py CHANGED Viewed

@@ -11,7 +11,7 @@ logging.getLogger("tensorflow").setLevel(logging.ERROR)
 import sys
 import warnings
 from functools import wraps
-from typing import Dict
+from typing import Dict, List, Optional, Tuple, Union
 import numpy as np
 import tensorflow as tf
@@ -40,6 +40,189 @@ def suppress_warnings(func):
     return wrapper
+class TransformRange:
+    """
+    Generates a new DataFrame with ranges represented as strings.
+    Transforms numerical columns into categorical range bins with descriptive labels.
+    """
+    def __init__(self, df: pd.DataFrame) -> None:
+        """Initializes the class with the original DataFrame.
+        Parameters
+        ----------
+        df : `pd.DataFrame`
+            The original DataFrame to transform.
+        Raises
+        ------
+        TypeError
+            If df is not a pandas DataFrame.
+        """
+        if not isinstance(df, pd.DataFrame):
+            raise TypeError("df must be a pandas DataFrame")
+        self.df = df.copy()  # Create a copy to avoid modifying the original
+    def _create_bins_and_labels(
+        self, min_val: Union[int, float], max_val: Union[int, float], bin_size: int
+    ) -> Tuple[np.ndarray, List[str]]:
+        """
+        Creates the bin edges and their labels.
+        Parameters
+        ----------
+        min_val : `int` or `float`
+            The minimum value for the range.
+        max_val : `int` or `float`
+            The maximum value for the range.
+        bin_size : `int`
+            The size of each bin.
+        Returns
+        -------
+        bins : `np.ndarray`
+            The bin edges.
+        labels : `list`
+            The labels for the bins.
+        Raises
+        ------
+        ValueError
+            If bin_size is not positive or if min_val >= max_val.
+        """
+        if bin_size <= 0:
+            raise ValueError("bin_size must be positive")
+        if min_val >= max_val:
+            raise ValueError("min_val must be less than max_val")
+        start = int(min_val)
+        end = int(max_val) + bin_size
+        bins = np.arange(start, end + 1, bin_size)
+        if bins[-1] <= max_val:
+            bins = np.append(bins, max_val + 1)
+        labels = [f"{int(bins[i])}-{int(bins[i+1] - 1)}" for i in range(len(bins) - 1)]
+        return bins, labels
+    def _transform_column_to_ranges(self, column: str, bin_size: int) -> pd.Series:
+        """
+        Transforms a column in the DataFrame into range bins.
+        Parameters
+        ----------
+        column : `str`
+            The name of the column to transform.
+        bin_size : `int`
+            The size of each bin.
+        Returns
+        -------
+        `pd.Series`
+            A Series with the range labels.
+        Raises
+        ------
+        KeyError
+            If column is not found in the DataFrame.
+        ValueError
+            If bin_size is not positive or if column contains non-numeric data.
+        """
+        if column not in self.df.columns:
+            raise KeyError(f"Column '{column}' not found in DataFrame")
+        if bin_size <= 0:
+            raise ValueError("bin_size must be positive")
+        numeric_series = pd.to_numeric(self.df[column], errors="coerce")
+        if numeric_series.isna().all():
+            raise ValueError(f"Column '{column}' contains no valid numeric data")
+        min_val = numeric_series.min()
+        max_val = numeric_series.max()
+        if min_val == max_val:
+            return pd.Series(
+                [f"{int(min_val)}-{int(max_val)}"] * len(self.df), name=f"{column}_range"
+            )
+        bins, labels = self._create_bins_and_labels(min_val, max_val, bin_size)
+        return pd.cut(numeric_series, bins=bins, labels=labels, right=False, include_lowest=True)
+    def transform_dataframe(
+        self, columns_bin_sizes: Dict[str, int], drop_original: bool = False
+    ) -> pd.DataFrame:
+        """
+        Creates a new DataFrame with range columns.
+        Parameters
+        ----------
+        columns_bin_sizes : `dict`
+            A dictionary where the keys are column names and the values are the bin sizes.
+        drop_original : `bool`, optional
+            If True, drops original columns from the result, by default False
+        Returns
+        -------
+        `pd.DataFrame`
+            A DataFrame with the transformed range columns.
+        Raises
+        ------
+        TypeError
+            If columns_bin_sizes is not a dictionary.
+        """
+        if not isinstance(columns_bin_sizes, dict):
+            raise TypeError("columns_bin_sizes must be a dictionary")
+        if not columns_bin_sizes:
+            return pd.DataFrame()
+        range_columns = {}
+        for column, bin_size in columns_bin_sizes.items():
+            range_columns[f"{column}_range"] = self._transform_column_to_ranges(column, bin_size)
+        result_df = pd.DataFrame(range_columns)
+        if not drop_original:
+            original_cols = [col for col in self.df.columns if col not in columns_bin_sizes]
+            if original_cols:
+                result_df = pd.concat([self.df[original_cols], result_df], axis=1)
+        return result_df
+    def get_range_info(self, column: str) -> Dict[str, Union[int, float, List[str]]]:
+        """
+        Get information about the range transformation for a specific column.
+        Parameters
+        ----------
+        column : `str`
+            The name of the column to analyze.
+        Returns
+        -------
+        `dict`
+            Dictionary containing min_val, max_val, bin_size, and labels.
+        """
+        if column not in self.df.columns:
+            raise KeyError(f"Column '{column}' not found in DataFrame")
+        numeric_series = pd.to_numeric(self.df[column], errors="coerce")
+        min_val = numeric_series.min()
+        max_val = numeric_series.max()
+        return {
+            "min_value": min_val,
+            "max_value": max_val,
+            "range": max_val - min_val,
+            "column": column,
+        }
 def remove_collinearity(df: DataFrame, threshold: float = 0.9):
     """
     Removes highly collinear features from the DataFrame based on a correlation threshold.
@@ -56,8 +239,8 @@ def remove_collinearity(df: DataFrame, threshold: float = 0.9):
         The correlation threshold above which features will be removed. Default is `0.9`.
     Returns
-    ----------
-        DataFrame: A DataFrame with highly collinear features removed.
+    -------
+        DataFrame : A DataFrame with highly collinear features removed.
     """
     corr_matrix = df.corr().abs()
     upper_triangle = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
@@ -97,11 +280,11 @@ def train_and_insights(
         Fraction of data to use (default is 1.0).
     Keyword Arguments:
-    ----------
+    ------------------
     Additional keyword arguments passed to the `model.fit` function, such as validation split and callbacks.
     Returns
-    ----------
+    -------
     `tf.keras.Model`
         The trained model after fitting.
     """
@@ -207,7 +390,7 @@ def graph_metrics(adj_matrix: np.ndarray, eigenvector_threshold: float = 1e-6) -
         A threshold for the eigenvector centrality calculation, used to determine the cutoff for small eigenvalues. Default is `1e-6`.
     Returns
-    ----------
+    -------
     DataFrame : A DataFrame containing the following graph metrics as columns.
         - `Degree Centrality`: Degree centrality values for each node, indicating the number of direct connections each node has.
         - `Clustering Coefficient`: Clustering coefficient values for each node, representing the degree to which nodes cluster together.
@@ -218,7 +401,7 @@ def graph_metrics(adj_matrix: np.ndarray, eigenvector_threshold: float = 1e-6) -
         - `Assortativity`: The assortativity coefficient of the graph, measuring the tendency of nodes to connect to similar nodes.
     Notes
-    ----------
+    -----
     The returned DataFrame will have one row for each node and one column for each of the computed metrics.
     """
     adj_matrix = adj_matrix.astype(int)
@@ -251,3 +434,7 @@ def graph_metrics(adj_matrix: np.ndarray, eigenvector_threshold: float = 1e-6) -
     metrics_df["Assortativity"] = assortativity
     return metrics_df
+if __name__ == "__main__":
+    pass

likelihood/tools/numeric_tools.py CHANGED Viewed

@@ -154,7 +154,7 @@ def xicor(X: np.ndarray, Y: np.ndarray, ties: bool = True, random_seed: int = No
         The first variable to be correlated. Must have at least one dimension.
     Y : `np.ndarray`
         The second variable to be correlated. Must have at least one dimension.
-    ties : bool
+    ties : `bool`
         Whether to handle ties using randomization.
     random_seed : int, optional
         Seed for the random number generator for reproducibility.
@@ -356,9 +356,9 @@ def find_multiples(target: int) -> tuple[int, int] | None:
     Returns
     -------
     tuple[int, int] | None
-        If i and i+1 both divide target, returns (i, i+1).
-        Otherwise, returns (i, target // i).
-        Returns None if no factors are found.
+        If `i` and `i+1` both divide target, returns (i, i+1).
+        Otherwise, returns `(i, target // i)`.
+        Returns `None` if no factors are found.
     """
     for i in range(2, target + 1):
         if target % i == 0:

likelihood/tools/tools.py CHANGED Viewed

@@ -861,7 +861,7 @@ class DataFrameEncoder:
         """Encodes the `object` type columns of the dataframe
         Keyword Arguments:
-        ----------
+        ------------------
         - save_mode (`bool`): An optional integer parameter. By default it is set to `True`
         - dictionary_name (`str`): An optional string parameter. By default it is set to `labelencoder_dictionary`
         - norm_method (`str`): An optional string parameter to perform normalization. By default it is set to `None`

{likelihood-2.0.0.dist-info → likelihood-2.0.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: likelihood
-Version: 2.0.0
+Version: 2.0.1
 Summary: A package that performs the maximum likelihood algorithm.
 Home-page: https://github.com/jzsmoreno/likelihood/
 Author: J. A. Moreno-Guerra

{likelihood-2.0.0.dist-info → likelihood-2.0.1.dist-info}/RECORD RENAMED Viewed

@@ -15,16 +15,16 @@ likelihood/models/deep/_predictor.py,sha256=XI4QfVM7PS_60zYtmi-V8UzNDrASFiDMVPmV
 likelihood/models/deep/autoencoders.py,sha256=muUBH9BclOK8ViI7PijyMOBBLVox6uwuIabyJvpU5qw,30729
 likelihood/models/deep/gan.py,sha256=rTnaLmIPjsKg6_0B8JZOVwPxdx59rHmqvzDitdJMCQ4,10924
 likelihood/models/deep/predictor.py,sha256=q5tPaAbF7s5XIcxVr6fyHTQdZa9tlixO9vb9a9Cw0wM,27831
-likelihood/models/deep/rl.py,sha256=9dhhnVTIETi9zvVeyOXYo1hl-LQJezmv0rgsUq11Qwc,11611
+likelihood/models/deep/rl.py,sha256=VVuwHwK24d2fe3uNHliE1QJsKGZAPhx_pdgj3jqN5rQ,11565
 likelihood/tools/__init__.py,sha256=N1IhMDzacsGQT2MIYBMBC0zTxes78vC_0gGrwkuPgmg,78
 likelihood/tools/cat_embed.py,sha256=SJ7o1vbrNYp21fLLcjRnWpUDcz1nVSe8TmMvsLIz5CI,7346
 likelihood/tools/figures.py,sha256=waF0NHIMrctCmaLhcuz5DMcXyRKynmn6aG0XITYCTLc,10940
 likelihood/tools/impute.py,sha256=n87Tv-xLUAdPl7BQLFcLWSsXBZbXksahyCayJWMydXc,9485
-likelihood/tools/models_tools.py,sha256=c3-vac-1MYSarYDtfR6XfVC7X_WY9auS7y2_3Z973IQ,8875
-likelihood/tools/numeric_tools.py,sha256=Hwf-lbqROqPPZ9N7eVzKIDyZxFGQdP53isWxPqpG0eo,12254
-likelihood/tools/tools.py,sha256=GKZsqjyO5tGXWGSfn3jlQBTjRlmBv2byfvpu-QclUx0,42188
-likelihood-2.0.0.dist-info/licenses/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
-likelihood-2.0.0.dist-info/METADATA,sha256=Ziysy1MQuW77OHHd1UzMtlfeUT9wsdgCl6rxW3uLBEE,2917
-likelihood-2.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-likelihood-2.0.0.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
-likelihood-2.0.0.dist-info/RECORD,,
+likelihood/tools/models_tools.py,sha256=-QAfvCy9mw-ZyeJHzJJ7O6eDfUXghtA7KfFtTc-Tp0A,14607
+likelihood/tools/numeric_tools.py,sha256=JeLECoVS3ayFH53kUYkAMs0fzALZV1M22-tBLM-Q34g,12264
+likelihood/tools/tools.py,sha256=5vPUHrm8D4ODsg-MP4uZ3NgXV9fNbs0Olx7RWtUdVDU,42196
+likelihood-2.0.1.dist-info/licenses/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
+likelihood-2.0.1.dist-info/METADATA,sha256=3mLJAcVO4jzu4IoCVVaSBPMxBWV-xnHs_f_DvvN9G0c,2917
+likelihood-2.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+likelihood-2.0.1.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
+likelihood-2.0.1.dist-info/RECORD,,

{likelihood-2.0.0.dist-info → likelihood-2.0.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{likelihood-2.0.0.dist-info → likelihood-2.0.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{likelihood-2.0.0.dist-info → likelihood-2.0.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

likelihood 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

likelihood 2.0.0py3-none-any.whl → 2.0.1py3-none-any.whl