PyPI - autofuzzts - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

autofuzzts 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

autofuzzts/config.py +17 -17
autofuzzts/data/data_loader.py +7 -7
autofuzzts/data_validation/validate.py +41 -41
autofuzzts/models/fuzzy_classifier.py +82 -82
autofuzzts/models/mlp_nas.py +90 -90
autofuzzts/partition/{fuzzy_clust_fun.py → fuzzy_part_fun.py} +107 -107
autofuzzts/partition/partition.py +109 -109
autofuzzts/partition/visualize_partition.py +32 -32
autofuzzts/pipeline.py +469 -469
autofuzzts/preprocess/prep_for_model.py +70 -70
autofuzzts/preprocess/preprocess.py +62 -62
{autofuzzts-0.1.2.dist-info → autofuzzts-0.1.3.dist-info}/METADATA +161 -146
autofuzzts-0.1.3.dist-info/RECORD +23 -0
{autofuzzts-0.1.2.dist-info → autofuzzts-0.1.3.dist-info}/WHEEL +1 -1
{autofuzzts-0.1.2.dist-info → autofuzzts-0.1.3.dist-info}/licenses/LICENSE +21 -21
autofuzzts/partition/fuzzy_clust_fun_orig.py +0 -129
autofuzzts/utils.py +0 -1
autofuzzts-0.1.2.dist-info/RECORD +0 -25
{autofuzzts-0.1.2.dist-info → autofuzzts-0.1.3.dist-info}/top_level.txt +0 -0

autofuzzts/partition/{fuzzy_clust_fun.py → fuzzy_part_fun.py} RENAMED Viewed

@@ -1,107 +1,107 @@
-## Functions for fuzzy clustering
-import numpy as np
-import pandas as pd
-def fuzzy_partition_cosine(X: pd.Series, n: int):
-    """
-    Midsteps of the calculation:
-    D - distance vector (D) represents the relative position of each data point within the partition
-    h - height, spread of the fuzzy sets
-    """
-    n_rows = len(X)
-    x_min = X.min()
-    x_max = X.max()
-    D = np.linspace(x_min, x_max, n)
-    h = (D[-1] - D[0]) / (n - 1)
-    A = np.zeros((n_rows, n))
-    for k in range(n_rows):
-        # First column
-        if (D[0] <= X[k]) and (X[k] <= D[1]):
-            A[k, 0] = 0.5 * (np.cos(np.pi * (X[k] - D[0]) / h) + 1)
-        # Last column
-        elif (D[n - 2] <= X[k]) and (X[k] <= D[n - 1]):
-            A[k, n - 1] = 0.5 * (np.cos(np.pi * (X[k] - D[n - 1]) / h) + 1)
-        # All other columns
-        for j in range(1, n - 1):
-            if (D[j - 1] <= X[k]) and (X[k] <= D[j + 1]):
-                A[k, j] = 0.5 * (np.cos(np.pi * (X[k] - D[j]) / h) + 1)
-    return D, A
-def fuzzy_partition_triangle(X: pd.Series, n: int):
-    """
-    Midsteps of the calculation:
-    D - distance vector (D) represents the relative position of each data point within the partition
-    h - height, spread of the fuzzy sets
-    """
-    n_rows = len(X)
-    x_min = X.min()
-    x_max = X.max()
-    D = np.linspace(x_min, x_max, n)
-    h = (D[-1] - D[0]) / (n - 1)
-    A = np.zeros((n_rows, n))
-    for k in range(n_rows):
-        # First column
-        if (D[0] <= X[k]) and (X[k] <= D[1]):
-            A[k, 0] = (D[1] - X[k]) / h
-        # Last column
-        elif (D[n - 2] <= X[k]) and (X[k] <= D[n - 1]):
-            A[k, n - 1] = (X[k] - D[n - 2]) / h
-        # All other columns
-        for j in range(1, n - 1):
-            if (D[j - 1] <= X[k]) and (X[k] <= D[j]):
-                A[k, j] = (X[k] - D[j - 1]) / h
-            if (D[j] <= X[k]) and (X[k] <= D[j + 1]):
-                A[k, j] = (D[j + 1] - X[k]) / h
-    return D, A
-def fuzzy_partition_gauss(X: pd.Series, n: int, sigma: float = 1):
-    """
-    Midsteps of the calculation:
-    D - distance vector (D) represents the relative position of each data point within the partition
-    h - height, spread of the fuzzy sets
-    sigma - standard deviation of the Gaussian function
-    """
-    n_rows = len(X)
-    x_min = X.min()
-    x_max = X.max()
-    D = np.linspace(x_min, x_max, n)
-    A = np.zeros((n_rows, n))
-    for k in range(n_rows):
-        # First column
-        if (D[0] <= X[k]) and (X[k] <= D[1]):
-            A[k, 0] = np.exp(-((X[k] - D[0]) ** 2) / (2 * sigma**2))
-        # Last column
-        elif (D[n - 2] <= X[k]) and (X[k] <= D[n - 1]):
-            A[k, n - 1] = np.exp(-((X[k] - D[n - 1]) ** 2) / (2 * sigma**2))
-        # All other columns
-        for j in range(1, n - 1):
-            if (D[j - 1] <= X[k]) and (X[k] <= D[j + 1]):
-                A[k, j] = np.exp(-((X[k] - D[j]) ** 2) / (2 * sigma**2))
-    return D, A
+## Functions for fuzzy partitioning
+import numpy as np
+import pandas as pd
+def fuzzy_partition_cosine(X: pd.Series, n: int):
+    """
+    Midsteps of the calculation:
+    D - distance vector (D) represents the relative position of each data point within the partition
+    h - height, spread of the fuzzy sets
+    """
+    n_rows = len(X)
+    x_min = X.min()
+    x_max = X.max()
+    D = np.linspace(x_min, x_max, n)
+    h = (D[-1] - D[0]) / (n - 1)
+    A = np.zeros((n_rows, n))
+    for k in range(n_rows):
+        # First column
+        if (D[0] <= X[k]) and (X[k] <= D[1]):
+            A[k, 0] = 0.5 * (np.cos(np.pi * (X[k] - D[0]) / h) + 1)
+        # Last column
+        elif (D[n - 2] <= X[k]) and (X[k] <= D[n - 1]):
+            A[k, n - 1] = 0.5 * (np.cos(np.pi * (X[k] - D[n - 1]) / h) + 1)
+        # All other columns
+        for j in range(1, n - 1):
+            if (D[j - 1] <= X[k]) and (X[k] <= D[j + 1]):
+                A[k, j] = 0.5 * (np.cos(np.pi * (X[k] - D[j]) / h) + 1)
+    return D, A
+def fuzzy_partition_triangle(X: pd.Series, n: int):
+    """
+    Midsteps of the calculation:
+    D - distance vector (D) represents the relative position of each data point within the partition
+    h - height, spread of the fuzzy sets
+    """
+    n_rows = len(X)
+    x_min = X.min()
+    x_max = X.max()
+    D = np.linspace(x_min, x_max, n)
+    h = (D[-1] - D[0]) / (n - 1)
+    A = np.zeros((n_rows, n))
+    for k in range(n_rows):
+        # First column
+        if (D[0] <= X[k]) and (X[k] <= D[1]):
+            A[k, 0] = (D[1] - X[k]) / h
+        # Last column
+        elif (D[n - 2] <= X[k]) and (X[k] <= D[n - 1]):
+            A[k, n - 1] = (X[k] - D[n - 2]) / h
+        # All other columns
+        for j in range(1, n - 1):
+            if (D[j - 1] <= X[k]) and (X[k] <= D[j]):
+                A[k, j] = (X[k] - D[j - 1]) / h
+            if (D[j] <= X[k]) and (X[k] <= D[j + 1]):
+                A[k, j] = (D[j + 1] - X[k]) / h
+    return D, A
+def fuzzy_partition_gauss(X: pd.Series, n: int, sigma: float = 1):
+    """
+    Midsteps of the calculation:
+    D - distance vector (D) represents the relative position of each data point within the partition
+    h - height, spread of the fuzzy sets
+    sigma - standard deviation of the Gaussian function
+    """
+    n_rows = len(X)
+    x_min = X.min()
+    x_max = X.max()
+    D = np.linspace(x_min, x_max, n)
+    A = np.zeros((n_rows, n))
+    for k in range(n_rows):
+        # First column
+        if (D[0] <= X[k]) and (X[k] <= D[1]):
+            A[k, 0] = np.exp(-((X[k] - D[0]) ** 2) / (2 * sigma**2))
+        # Last column
+        elif (D[n - 2] <= X[k]) and (X[k] <= D[n - 1]):
+            A[k, n - 1] = np.exp(-((X[k] - D[n - 1]) ** 2) / (2 * sigma**2))
+        # All other columns
+        for j in range(1, n - 1):
+            if (D[j - 1] <= X[k]) and (X[k] <= D[j + 1]):
+                A[k, j] = np.exp(-((X[k] - D[j]) ** 2) / (2 * sigma**2))
+    return D, A

autofuzzts/partition/partition.py CHANGED Viewed

@@ -1,110 +1,110 @@
-import numpy as np
-import pandas as pd
-from typing import Union, Literal
-import warnings
-from sklearn.preprocessing import MinMaxScaler
-from autofuzzts.partition.fuzzy_clust_fun import (
-    fuzzy_partition_cosine,
-    fuzzy_partition_triangle,
-    fuzzy_partition_gauss,
-)
-class FuzzyPartition:
-    def __init__(self, fuzzy_function: Literal["cosine", "triangle", "gauss"], n_clusters: int, sigma: float, scaler: MinMaxScaler, verbosity: bool = False):
-        self.fuzzy_function = self._get_fuzzy_partition_func(fuzzy_function)
-        self.n_clusters = n_clusters
-        self.sigma = sigma
-        self.verbosity = verbosity
-        self.scaler = scaler
-        if scaler is None:  # Check if scaler is None
-            warnings.warn("Scaler must be provided for inverse transformation.")
-    def _get_fuzzy_partition_func(self, fuzzy_part_func: Union[str, None]):
-        if fuzzy_part_func == "cosine":
-            return fuzzy_partition_cosine  # Replace with actual function
-        elif fuzzy_part_func == "triangle":
-            return fuzzy_partition_triangle  # Replace with actual function
-        elif fuzzy_part_func == "gauss":
-            return fuzzy_partition_gauss  # Replace with actual function
-        else:
-            return fuzzy_partition_cosine  # Default function
-    def fuzzy_partition(self, X: np.ndarray) -> pd.DataFrame:
-        """
-        Perform fuzzy partitioning on the target variable X.
-        Parameters:
-            X (np.ndarray): Input data to be partitioned.
-        Returns:
-            pd.DataFrame: DataFrame containing partition results.
-        """
-        # Perform fuzzy partitioning using the selected function
-        if self.fuzzy_function.__name__ == "fuzzy_partition_gauss":
-            D, A = self.fuzzy_function(X=X, n=self.n_clusters, sigma=self.sigma)
-        else:
-            D, A = self.fuzzy_function(X=X, n=self.n_clusters)
-        center_points = list(D.flatten())
-        center_points = [round(i, 2) for i in center_points]
-        center_points = np.array(center_points)
-        if self.verbosity:
-            print("Cluster center points:", center_points)
-        # Unscaled center points
-        center_points_unscaled = self.scaler.inverse_transform(
-            center_points.reshape(-1, 1)
-        )
-        self.center_points_unscaled = center_points_unscaled.flatten()
-        if self.verbosity:
-            print("Cluster center points unscaled:", self.center_points_unscaled.flatten())
-        # Create a DataFrame for membership values
-        A_df = pd.DataFrame(A)
-        A_df.columns = ["set_" + str(i) for i in range(A_df.shape[1])]
-        # Prepare the fuzzy partition DataFrame
-        fp_df = A_df.copy()
-        fp_df.insert(0, "X_value", X)
-        fp_df["membership_value"] = fp_df.iloc[:, 1:].max(axis=1)
-        fp_df["cluster"] = fp_df.iloc[:, 1:].idxmax(axis=1)
-        # Initialize 'left' and 'right' columns
-        fp_df["left"] = 0
-        fp_df["right"] = 0
-        # Define sets for left and right logic
-        set_min = "set_0"
-        set_max = "set_" + str(len(center_points) - 1)
-        # Set left and right for min and max sets
-        fp_df.loc[fp_df["cluster"] == set_min, "right"] = 1
-        fp_df.loc[fp_df["cluster"] == set_max, "left"] = 1
-        fp_df["center_point"] = ""
-        fp_df.loc[fp_df["cluster"] == set_min, "center_point"] = 0
-        fp_df.loc[fp_df["cluster"] == set_max, "center_point"] = 1
-        # Logic for intermediate clusters
-        for i in range(1, len(center_points) - 1):
-            set_i = "set_" + str(i)
-            fp_df.loc[fp_df["cluster"] == set_i, "center_point"] = center_points[i]
-            fp_df.loc[
-                (fp_df["cluster"] == set_i) & (fp_df["X_value"] >= center_points[i]),
-                "right",
-            ] = 1
-            fp_df.loc[
-                (fp_df["cluster"] == set_i) & (fp_df["X_value"] < center_points[i]),
-                "left",
-            ] = 1
-        # Ensure membership values are non-negative
-        fp_df.loc[fp_df["membership_value"] < 0, "membership_value"] = 0
-        # Keep only relevant columns
-        fp_df = fp_df.loc[:, ["X_value", "membership_value", "cluster", "left"]]
+import numpy as np
+import pandas as pd
+from typing import Union, Literal
+import warnings
+from sklearn.preprocessing import MinMaxScaler
+from autofuzzts.partition.fuzzy_part_fun import (
+    fuzzy_partition_cosine,
+    fuzzy_partition_triangle,
+    fuzzy_partition_gauss,
+)
+class FuzzyPartition:
+    def __init__(self, fuzzy_function: Literal["cosine", "triangle", "gauss"], n_fuzzy_sets: int, sigma: float, scaler: MinMaxScaler, verbosity: bool = False):
+        self.fuzzy_function = self._get_fuzzy_partition_func(fuzzy_function)
+        self.n_fuzzy_sets = n_fuzzy_sets
+        self.sigma = sigma
+        self.verbosity = verbosity
+        self.scaler = scaler
+        if scaler is None:  # Check if scaler is None
+            warnings.warn("Scaler must be provided for inverse transformation.")
+    def _get_fuzzy_partition_func(self, fuzzy_part_func: Union[str, None]):
+        if fuzzy_part_func == "cosine":
+            return fuzzy_partition_cosine  # Replace with actual function
+        elif fuzzy_part_func == "triangle":
+            return fuzzy_partition_triangle  # Replace with actual function
+        elif fuzzy_part_func == "gauss":
+            return fuzzy_partition_gauss  # Replace with actual function
+        else:
+            return fuzzy_partition_cosine  # Default function
+    def fuzzy_partition(self, X: np.ndarray) -> pd.DataFrame:
+        """
+        Perform fuzzy partitioning on the target variable X.
+        Parameters:
+            X (np.ndarray): Input data to be partitioned.
+        Returns:
+            pd.DataFrame: DataFrame containing partition results.
+        """
+        # Perform fuzzy partitioning using the selected function
+        if self.fuzzy_function.__name__ == "fuzzy_partition_gauss":
+            D, A = self.fuzzy_function(X=X, n=self.n_fuzzy_sets, sigma=self.sigma)
+        else:
+            D, A = self.fuzzy_function(X=X, n=self.n_fuzzy_sets)
+        center_points = list(D.flatten())
+        center_points = [round(i, 2) for i in center_points]
+        center_points = np.array(center_points)
+        if self.verbosity:
+            print("Fuzzy set center points:", center_points)
+        # Unscaled center points
+        center_points_unscaled = self.scaler.inverse_transform(
+            center_points.reshape(-1, 1)
+        )
+        self.center_points_unscaled = center_points_unscaled.flatten()
+        if self.verbosity:
+            print("fuzzy_set center points unscaled:", self.center_points_unscaled.flatten())
+        # Create a DataFrame for membership values
+        A_df = pd.DataFrame(A)
+        A_df.columns = ["set_" + str(i) for i in range(A_df.shape[1])]
+        # Prepare the fuzzy partition DataFrame
+        fp_df = A_df.copy()
+        fp_df.insert(0, "X_value", X)
+        fp_df["membership_value"] = fp_df.iloc[:, 1:].max(axis=1)
+        fp_df["fuzzy_set"] = fp_df.iloc[:, 1:].idxmax(axis=1)
+        # Initialize 'left' and 'right' columns
+        fp_df["left"] = 0
+        fp_df["right"] = 0
+        # Define sets for left and right logic
+        set_min = "set_0"
+        set_max = "set_" + str(len(center_points) - 1)
+        # Set left and right for min and max sets
+        fp_df.loc[fp_df["fuzzy_set"] == set_min, "right"] = 1
+        fp_df.loc[fp_df["fuzzy_set"] == set_max, "left"] = 1
+        fp_df["center_point"] = ""
+        fp_df.loc[fp_df["fuzzy_set"] == set_min, "center_point"] = 0
+        fp_df.loc[fp_df["fuzzy_set"] == set_max, "center_point"] = 1
+        # Logic for intermediate fuzzy_sets
+        for i in range(1, len(center_points) - 1):
+            set_i = "set_" + str(i)
+            fp_df.loc[fp_df["fuzzy_set"] == set_i, "center_point"] = center_points[i]
+            fp_df.loc[
+                (fp_df["fuzzy_set"] == set_i) & (fp_df["X_value"] >= center_points[i]),
+                "right",
+            ] = 1
+            fp_df.loc[
+                (fp_df["fuzzy_set"] == set_i) & (fp_df["X_value"] < center_points[i]),
+                "left",
+            ] = 1
+        # Ensure membership values are non-negative
+        fp_df.loc[fp_df["membership_value"] < 0, "membership_value"] = 0
+        # Keep only relevant columns
+        fp_df = fp_df.loc[:, ["X_value", "membership_value", "fuzzy_set", "left"]]
         return fp_df, center_points, center_points_unscaled.flatten()

autofuzzts/partition/visualize_partition.py CHANGED Viewed

@@ -1,32 +1,32 @@
-import numpy as np
-import matplotlib.pyplot as plt
-def visualize_partition(fp_df, center_points):
-    plt.figure(figsize=(6, 3))
-    # Scatter plot with size based on membership value
-    plt.scatter(
-        fp_df["X_value"],
-        fp_df["membership_value"],
-        c=fp_df["cluster"].astype("category").cat.codes,
-        cmap="viridis",
-        s=50,
-    )
-    plt.xlabel("X")
-    plt.ylabel("Membership Value")
-    plt.title("Fuzzy Partition")
-    # Plot center points with horizontal line at y=0.5
-    plt.plot(center_points, np.ones_like(center_points) * 0.5, "x", markersize=10)
-    # Add labels for center points with slight vertical offset
-    for i, txt in enumerate(center_points):
-        plt.annotate(
-            txt,
-            (center_points[i], 0.5 + 0.015),
-            horizontalalignment="center",
-            fontsize=8,
-        )
-    plt.show()
+import numpy as np
+import matplotlib.pyplot as plt
+def visualize_partition(fp_df, center_points):
+    plt.figure(figsize=(6, 3))
+    # Scatter plot with size based on membership value
+    plt.scatter(
+        fp_df["X_value"],
+        fp_df["membership_value"],
+        c=fp_df["fuzzy_set"].astype("category").cat.codes,
+        cmap="viridis",
+        s=50,
+    )
+    plt.xlabel("X")
+    plt.ylabel("Membership Value")
+    plt.title("Fuzzy Partition")
+    # Plot center points with horizontal line at y=0.5
+    plt.plot(center_points, np.ones_like(center_points) * 0.5, "x", markersize=10)
+    # Add labels for center points with slight vertical offset
+    for i, txt in enumerate(center_points):
+        plt.annotate(
+            txt,
+            (center_points[i], 0.5 + 0.015),
+            horizontalalignment="center",
+            fontsize=8,
+        )
+    plt.show()

autofuzzts 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

autofuzzts 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl