PyPI - likelihood - Versions diffs - 1.5.7__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

likelihood 1.5.7py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

likelihood/graph/__init__.py +8 -0
likelihood/graph/_nn.py +421 -0
likelihood/models/deep/__init__.py +11 -2
likelihood/models/deep/_autoencoders.py +895 -0
likelihood/models/deep/_predictor.py +810 -0
likelihood/models/deep/autoencoders.py +52 -29
likelihood/models/deep/gan.py +7 -7
likelihood/models/deep/predictor.py +10 -8
likelihood/models/deep/rl.py +350 -0
likelihood/models/simulation.py +9 -4
likelihood/tools/cat_embed.py +213 -0
likelihood/tools/tools.py +7 -2
{likelihood-1.5.7.dist-info → likelihood-2.0.0.dist-info}/METADATA +4 -3
likelihood-2.0.0.dist-info/RECORD +30 -0
likelihood-1.5.7.dist-info/RECORD +0 -25
{likelihood-1.5.7.dist-info → likelihood-2.0.0.dist-info}/WHEEL +0 -0
{likelihood-1.5.7.dist-info → likelihood-2.0.0.dist-info}/licenses/LICENSE +0 -0
{likelihood-1.5.7.dist-info → likelihood-2.0.0.dist-info}/top_level.txt +0 -0

likelihood/tools/cat_embed.py ADDED Viewed

@@ -0,0 +1,213 @@
+import logging
+import os
+from typing import List
+import numpy as np
+import pandas as pd
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+logging.getLogger("tensorflow").setLevel(logging.ERROR)
+import tensorflow as tf
+from pandas.core.frame import DataFrame
+from sklearn.preprocessing import LabelEncoder
+tf.get_logger().setLevel("ERROR")
+class CategoricalEmbedder:
+    def __init__(self, embedding_dim=32):
+        self.embedding_dim = embedding_dim
+        self.label_encoders = {}
+        self.embeddings = {}
+    def fit(self, df: DataFrame, categorical_cols: List):
+        """
+        Fit the embeddings on the given data.
+        Parameters
+        ----------
+        df : `DataFrame`
+            Pandas DataFrame containing the tabular data.
+        categorical_cols : `List`
+            List of column names representing categorical features.
+        Returns
+        -------
+        `None`
+        """
+        df_processed = df.copy()
+        for col in categorical_cols:
+            if col not in df_processed.columns:
+                raise ValueError(f"Column {col} not found in DataFrame")
+        for col in categorical_cols:
+            mode_val = df_processed[col].mode()
+            if not mode_val.empty:
+                df_processed[col] = df_processed[col].fillna(mode_val[0])
+        for col in categorical_cols:
+            le = LabelEncoder()
+            df_processed[col] = le.fit_transform(df_processed[col])
+            self.label_encoders[col] = le
+            vocab_size = len(le.classes_)
+            embedding_matrix = np.random.rand(vocab_size, self.embedding_dim)
+            self.embeddings[col] = tf.Variable(embedding_matrix, dtype=tf.float32)
+    def transform(self, df: DataFrame, categorical_cols: List[str]):
+        """
+        Transform the data using the fitted embeddings.
+        Parameters
+        ----------
+        df : `DataFrame`
+            Pandas DataFrame containing the tabular data.
+        categorical_cols : `List[str]`
+            List of column names representing categorical features.
+        Returns
+        -------
+        Transformed Pandas DataFrame with original columns except `categorical_cols` replaced by their embedding representations.
+        """
+        df_processed = df.copy()
+        for col in categorical_cols:
+            if col not in self.label_encoders:
+                raise ValueError(
+                    f"Column {col} has not been fitted. Please call fit() on this column first."
+                )
+            mode_val = df_processed[col].mode()
+            if not mode_val.empty:
+                df_processed[col] = df_processed[col].fillna(mode_val[0])
+            le = self.label_encoders[col]
+            df_processed[col] = le.transform(df_processed[col])
+        for col in categorical_cols:
+            indices_tensor = tf.constant(df_processed[col], dtype=tf.int32)
+            embedding_layer = tf.nn.embedding_lookup(
+                params=self.embeddings[col], ids=indices_tensor
+            )
+            if len(embedding_layer.shape) == 1:
+                embedding_layer = tf.expand_dims(embedding_layer, axis=0)
+            for i in range(self.embedding_dim):
+                df_processed[f"{col}_embed_{i}"] = embedding_layer[:, i]
+            df_processed.drop(columns=[col], inplace=True)
+        return df_processed
+    def inverse_transform(self, df: pd.DataFrame, categorical_cols: List[str]):
+        """
+        Inverse transform the data using the fitted embeddings.
+        Parameters
+        ----------
+        df : `DataFrame`
+            Pandas DataFrame containing the tabular data with embedded representations.
+        categorical_cols : `List[str]`
+            List of column names representing categorical features.
+        Returns
+        -------
+        Transformed Pandas DataFrame with original columns replaced by their categorical labels.
+        """
+        df_processed = df.copy()
+        for col in categorical_cols:
+            if col not in self.label_encoders:
+                raise ValueError(
+                    f"Column {col} has not been fitted. Please call fit() on this column first."
+                )
+            embedding_matrix = self.embeddings[col].numpy()
+            label_encoder = self.label_encoders[col]
+            embedded_columns = [f"{col}_embed_{i}" for i in range(self.embedding_dim)]
+            embeddings = df_processed[embedded_columns].values
+            distances = np.linalg.norm(embedding_matrix - embeddings[:, np.newaxis], axis=2)
+            original_indices = np.argmin(distances, axis=1)
+            original_labels = label_encoder.inverse_transform(original_indices)
+            df_processed[col] = original_labels
+            df_processed.drop(columns=embedded_columns, inplace=True)
+        return df_processed
+    def save_embeddings(self, path: str):
+        """
+        Save the embeddings to a directory.
+        Parameters
+        ----------
+        path : `str`
+            Path to the directory where embeddings will be saved.
+        """
+        os.makedirs(path, exist_ok=True)
+        for col, embedding in self.embeddings.items():
+            np.save(os.path.join(path, f"{col}_embedding.npy"), embedding.numpy())
+    def load_embeddings(self, path: str):
+        """
+        Load the embeddings from a directory.
+        Parameters
+        ----------
+        path : `str`
+            Path to the directory where embeddings are saved.
+        """
+        for col in self.label_encoders.keys():
+            embedding_path = os.path.join(path, f"{col}_embedding.npy")
+            if not os.path.exists(embedding_path):
+                raise FileNotFoundError(f"Embedding file {embedding_path} not found.")
+            embedding_matrix = np.load(embedding_path)
+            self.embeddings[col] = tf.Variable(embedding_matrix, dtype=tf.float32)
+if __name__ == "__main__":
+    data = {
+        "color": ["red", "blue", None, "green", "blue"],
+        "size": ["S", "M", "XL", "XS", None],
+        "price": [10.99, 25.50, 30.00, 8.75, 12.25],
+    }
+    df = pd.DataFrame(data)
+    # Initialize the embedder
+    embedder = CategoricalEmbedder(embedding_dim=3)
+    # Fit the embeddings on the data
+    embedder.fit(df, categorical_cols=["color", "size"])
+    # Transform the data using the fitted embeddings
+    processed_df = embedder.transform(df, categorical_cols=["color", "size"])
+    print("Processed DataFrame:")
+    print(processed_df.head())
+    # Save the embeddings to disk
+    embedder.save_embeddings("./embeddings")
+    # Load the embeddings from disk
+    new_embedder = CategoricalEmbedder(embedding_dim=3)
+    new_embedder.label_encoders = (
+        embedder.label_encoders
+    )  # Assuming label encodings are consistent across runs
+    new_embedder.load_embeddings("./embeddings")
+    # Transform the data using the loaded embeddings
+    processed_df_loaded = new_embedder.transform(df, categorical_cols=["color", "size"])
+    print("\nProcessed DataFrame with Loaded Embeddings:")
+    print(processed_df_loaded.head())
+    # Inverse transform the data
+    df_loaded = new_embedder.inverse_transform(
+        processed_df_loaded, categorical_cols=["color", "size"]
+    )
+    print("\nOriginal DataFrame:")
+    print(df.head())
+    print("\nProcessed DataFrame with Inverse Transform:")
+    print(df_loaded.head())

likelihood/tools/tools.py CHANGED Viewed

@@ -8,10 +8,15 @@ import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import yaml
+from packaging import version
 from pandas.core.frame import DataFrame
-# Suppress RankWarning
-warnings.simplefilter("ignore", np.RankWarning)
+if version.parse(np.__version__) < version.parse("2.0.0"):
+    filter = np.RankWarning
+else:
+    filter = np.exceptions.RankWarning
+warnings.simplefilter("ignore", filter)
 # -------------------------------------------------------------------------

{likelihood-1.5.7.dist-info → likelihood-2.0.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: likelihood
-Version: 1.5.7
+Version: 2.0.0
 Summary: A package that performs the maximum likelihood algorithm.
 Home-page: https://github.com/jzsmoreno/likelihood/
 Author: J. A. Moreno-Guerra
@@ -20,9 +20,10 @@ Requires-Dist: pydocstyle>=6.3.0
 Requires-Dist: flake8>=6.0.0
 Requires-Dist: isort>=5.12.0
 Requires-Dist: mypy>=1.4.1
-Requires-Dist: numpy<2.0.0
+Requires-Dist: numpy<3.0.0,>=1.26.4
 Requires-Dist: pydot==2.0.0
 Requires-Dist: matplotlib
+Requires-Dist: packaging
 Requires-Dist: graphviz
 Requires-Dist: seaborn
 Requires-Dist: pyyaml
@@ -32,7 +33,7 @@ Requires-Dist: tqdm
 Provides-Extra: full
 Requires-Dist: networkx; extra == "full"
 Requires-Dist: pyvis; extra == "full"
-Requires-Dist: tensorflow==2.15.0; extra == "full"
+Requires-Dist: tensorflow>=2.15.0; extra == "full"
 Requires-Dist: keras-tuner; extra == "full"
 Requires-Dist: scikit-learn; extra == "full"
 Dynamic: author

likelihood-2.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,30 @@
+likelihood/__init__.py,sha256=5C0hapdsk85XZhN_rssRAEFpkRRuKNtj6cyRbqD2_gM,994
+likelihood/main.py,sha256=fcCkGOOWKjfvw2tLVqjuKPV8t0rVCIT9FlbYcOv4EYo,7974
+likelihood/graph/__init__.py,sha256=vUY4pKlnm3eSVTXd2d-5JDPawhqGNRIKRhaHIobsNws,188
+likelihood/graph/_nn.py,sha256=Sh7dRz8QSI08Ydfw9e--uCxc4KMtHUsCz_-C-loXklQ,13883
+likelihood/graph/graph.py,sha256=bLrNMvIh7GOTdPTwnNss8oPZ7cbSHQScAsH_ttmVUK0,3294
+likelihood/graph/nn.py,sha256=uxCxGt1suKmThmEjFope2ew93-WlgvGhgr6RVCHwzhM,11420
+likelihood/models/__init__.py,sha256=e6nB4w47w0Q9DrAFeP3OcUgcoHOtf7Il4mBhgf4AARg,52
+likelihood/models/hmm.py,sha256=0s0gFySH1u4NjRaZDxiZ8oeTaFhFrw1x0GJxwy3dFrA,6253
+likelihood/models/regression.py,sha256=9cakyGlJCEO6WfpoKLh3GxdXQeQp7cUvJIkQ5odT0TA,9404
+likelihood/models/simulation.py,sha256=xsl4mJ2qFCuZR_B9LfQcLjV6OtONU1zyESX3CCUfOiw,8619
+likelihood/models/utils.py,sha256=dvigPi_hxcs5ntfHr7Y1JvP5ULtMW3kkN0nJpS4orE8,1319
+likelihood/models/deep/__init__.py,sha256=I55FciI0BfljYdhW2OGNqcpYV57FhPZETZX7Y1y9GVQ,303
+likelihood/models/deep/_autoencoders.py,sha256=CeD79YzU7DdPd92wUNG_EtPVQOBgsgYoC4uS2JF3b6o,30939
+likelihood/models/deep/_predictor.py,sha256=XI4QfVM7PS_60zYtmi-V8UzNDrASFiDMVPmV17BB8lM,27984
+likelihood/models/deep/autoencoders.py,sha256=muUBH9BclOK8ViI7PijyMOBBLVox6uwuIabyJvpU5qw,30729
+likelihood/models/deep/gan.py,sha256=rTnaLmIPjsKg6_0B8JZOVwPxdx59rHmqvzDitdJMCQ4,10924
+likelihood/models/deep/predictor.py,sha256=q5tPaAbF7s5XIcxVr6fyHTQdZa9tlixO9vb9a9Cw0wM,27831
+likelihood/models/deep/rl.py,sha256=9dhhnVTIETi9zvVeyOXYo1hl-LQJezmv0rgsUq11Qwc,11611
+likelihood/tools/__init__.py,sha256=N1IhMDzacsGQT2MIYBMBC0zTxes78vC_0gGrwkuPgmg,78
+likelihood/tools/cat_embed.py,sha256=SJ7o1vbrNYp21fLLcjRnWpUDcz1nVSe8TmMvsLIz5CI,7346
+likelihood/tools/figures.py,sha256=waF0NHIMrctCmaLhcuz5DMcXyRKynmn6aG0XITYCTLc,10940
+likelihood/tools/impute.py,sha256=n87Tv-xLUAdPl7BQLFcLWSsXBZbXksahyCayJWMydXc,9485
+likelihood/tools/models_tools.py,sha256=c3-vac-1MYSarYDtfR6XfVC7X_WY9auS7y2_3Z973IQ,8875
+likelihood/tools/numeric_tools.py,sha256=Hwf-lbqROqPPZ9N7eVzKIDyZxFGQdP53isWxPqpG0eo,12254
+likelihood/tools/tools.py,sha256=GKZsqjyO5tGXWGSfn3jlQBTjRlmBv2byfvpu-QclUx0,42188
+likelihood-2.0.0.dist-info/licenses/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
+likelihood-2.0.0.dist-info/METADATA,sha256=Ziysy1MQuW77OHHd1UzMtlfeUT9wsdgCl6rxW3uLBEE,2917
+likelihood-2.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+likelihood-2.0.0.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
+likelihood-2.0.0.dist-info/RECORD,,

likelihood-1.5.7.dist-info/RECORD DELETED Viewed

@@ -1,25 +0,0 @@
-likelihood/__init__.py,sha256=5C0hapdsk85XZhN_rssRAEFpkRRuKNtj6cyRbqD2_gM,994
-likelihood/main.py,sha256=fcCkGOOWKjfvw2tLVqjuKPV8t0rVCIT9FlbYcOv4EYo,7974
-likelihood/graph/__init__.py,sha256=6TuFDfmXTwpLyHl7_KqBfdzW6zqHjGzIFvymjFPlvjI,21
-likelihood/graph/graph.py,sha256=bLrNMvIh7GOTdPTwnNss8oPZ7cbSHQScAsH_ttmVUK0,3294
-likelihood/graph/nn.py,sha256=uxCxGt1suKmThmEjFope2ew93-WlgvGhgr6RVCHwzhM,11420
-likelihood/models/__init__.py,sha256=e6nB4w47w0Q9DrAFeP3OcUgcoHOtf7Il4mBhgf4AARg,52
-likelihood/models/hmm.py,sha256=0s0gFySH1u4NjRaZDxiZ8oeTaFhFrw1x0GJxwy3dFrA,6253
-likelihood/models/regression.py,sha256=9cakyGlJCEO6WfpoKLh3GxdXQeQp7cUvJIkQ5odT0TA,9404
-likelihood/models/simulation.py,sha256=6OD2IXAnbctxtOzUJ2b9vKW7_tdGs4dQYmQQShqsioA,8443
-likelihood/models/utils.py,sha256=dvigPi_hxcs5ntfHr7Y1JvP5ULtMW3kkN0nJpS4orE8,1319
-likelihood/models/deep/__init__.py,sha256=UV_VYhySvrNnB4a0VXYM4wK3KKF7ytjLFFfwvnaZWaA,82
-likelihood/models/deep/autoencoders.py,sha256=9-ZOKbS02tojCufg_Fbd5_Z48pSFSqZnfZZJVohNqdk,29985
-likelihood/models/deep/gan.py,sha256=aoSaNO5LvCU62cjxA0AxvnQvE7NSFtrp1Ta4EDJchpo,10874
-likelihood/models/deep/predictor.py,sha256=Z6GVm9ciz90cMcp4Q6Lvm-_8_9ZOxX1kBquReW2aGqM,27688
-likelihood/tools/__init__.py,sha256=N1IhMDzacsGQT2MIYBMBC0zTxes78vC_0gGrwkuPgmg,78
-likelihood/tools/figures.py,sha256=waF0NHIMrctCmaLhcuz5DMcXyRKynmn6aG0XITYCTLc,10940
-likelihood/tools/impute.py,sha256=n87Tv-xLUAdPl7BQLFcLWSsXBZbXksahyCayJWMydXc,9485
-likelihood/tools/models_tools.py,sha256=c3-vac-1MYSarYDtfR6XfVC7X_WY9auS7y2_3Z973IQ,8875
-likelihood/tools/numeric_tools.py,sha256=Hwf-lbqROqPPZ9N7eVzKIDyZxFGQdP53isWxPqpG0eo,12254
-likelihood/tools/tools.py,sha256=lk9BIskjUKYQ1XVwARm9jAjHuLQ4UO68aZY8oxkzk5c,42056
-likelihood-1.5.7.dist-info/licenses/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
-likelihood-1.5.7.dist-info/METADATA,sha256=V8yQ5NJPbMyxOB7sICsp5QCkZ8MZhxkfS-4WCWMrFG0,2883
-likelihood-1.5.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-likelihood-1.5.7.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
-likelihood-1.5.7.dist-info/RECORD,,

{likelihood-1.5.7.dist-info → likelihood-2.0.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{likelihood-1.5.7.dist-info → likelihood-2.0.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{likelihood-1.5.7.dist-info → likelihood-2.0.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

likelihood 1.5.7__py3-none-any.whl → 2.0.0__py3-none-any.whl

likelihood 1.5.7py3-none-any.whl → 2.0.0py3-none-any.whl