autofuzzts 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. autofuzzts-0.1.0/LICENSE +21 -0
  2. autofuzzts-0.1.0/PKG-INFO +41 -0
  3. autofuzzts-0.1.0/README.md +9 -0
  4. autofuzzts-0.1.0/autofuzzts/__init__.py +1 -0
  5. autofuzzts-0.1.0/autofuzzts/config.py +18 -0
  6. autofuzzts-0.1.0/autofuzzts/data/__init__.py +1 -0
  7. autofuzzts-0.1.0/autofuzzts/data/data_loader.py +7 -0
  8. autofuzzts-0.1.0/autofuzzts/data_validation/__init__.py +1 -0
  9. autofuzzts-0.1.0/autofuzzts/data_validation/validate.py +41 -0
  10. autofuzzts-0.1.0/autofuzzts/evaluation/__init__.py +1 -0
  11. autofuzzts-0.1.0/autofuzzts/models/__init__.py +1 -0
  12. autofuzzts-0.1.0/autofuzzts/models/fuzzy_classifier.py +83 -0
  13. autofuzzts-0.1.0/autofuzzts/models/mlp_nas.py +90 -0
  14. autofuzzts-0.1.0/autofuzzts/partition/__init__.py +1 -0
  15. autofuzzts-0.1.0/autofuzzts/partition/fuzzy_clust_fun.py +107 -0
  16. autofuzzts-0.1.0/autofuzzts/partition/fuzzy_clust_fun_orig.py +129 -0
  17. autofuzzts-0.1.0/autofuzzts/partition/partition.py +110 -0
  18. autofuzzts-0.1.0/autofuzzts/partition/visualize_partition.py +32 -0
  19. autofuzzts-0.1.0/autofuzzts/pipeline.py +383 -0
  20. autofuzzts-0.1.0/autofuzzts/preprocess/__init__.py +1 -0
  21. autofuzzts-0.1.0/autofuzzts/preprocess/prep_for_model.py +70 -0
  22. autofuzzts-0.1.0/autofuzzts/preprocess/preprocess.py +63 -0
  23. autofuzzts-0.1.0/autofuzzts/utils.py +1 -0
  24. autofuzzts-0.1.0/autofuzzts.egg-info/PKG-INFO +41 -0
  25. autofuzzts-0.1.0/autofuzzts.egg-info/SOURCES.txt +31 -0
  26. autofuzzts-0.1.0/autofuzzts.egg-info/dependency_links.txt +1 -0
  27. autofuzzts-0.1.0/autofuzzts.egg-info/requires.txt +14 -0
  28. autofuzzts-0.1.0/autofuzzts.egg-info/top_level.txt +2 -0
  29. autofuzzts-0.1.0/pyproject.toml +36 -0
  30. autofuzzts-0.1.0/setup.cfg +4 -0
  31. autofuzzts-0.1.0/setup.py +36 -0
  32. autofuzzts-0.1.0/tests/test_models.py +1 -0
  33. autofuzzts-0.1.0/tests/test_pipeline.py +1 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Jan Timko
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,41 @@
1
+ Metadata-Version: 2.4
2
+ Name: autofuzzts
3
+ Version: 0.1.0
4
+ Summary: Automated fuzzy time series forecasting library
5
+ Home-page: https://github.com/jtimko16/AutoFuzzTS
6
+ Author: Jan Timko
7
+ Author-email: Jan Timko <jantimko16@gmail.com>
8
+ License: MIT
9
+ Project-URL: Homepage, https://github.com/jtimko16/AutoFuzzTS
10
+ Project-URL: Bug Tracker, https://github.com/jtimko16/AutoFuzzTS/issues
11
+ Requires-Python: >=3.11
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: numpy>=1.26.0
15
+ Requires-Dist: pandas>=2.2.0
16
+ Requires-Dist: scikit-learn>=1.5.0
17
+ Requires-Dist: scipy>=1.15.0
18
+ Requires-Dist: xgboost>=3.0.0
19
+ Requires-Dist: lightgbm>=4.6.0
20
+ Requires-Dist: tpot>=1.0.0
21
+ Requires-Dist: optuna>=4.3.0
22
+ Requires-Dist: matplotlib>=3.10.0
23
+ Requires-Dist: seaborn>=0.13.0
24
+ Requires-Dist: requests>=2.32.0
25
+ Requires-Dist: PyYAML>=6.0.0
26
+ Requires-Dist: joblib>=1.4.0
27
+ Requires-Dist: tqdm>=4.67.0
28
+ Dynamic: author
29
+ Dynamic: home-page
30
+ Dynamic: license-file
31
+ Dynamic: requires-python
32
+
33
+ # AutoFuzzTS
34
+
35
+ Automated fuzzy time series forecasting library in Python.
36
+ Build and evaluate time series models automatically using fuzzy logic and AutoML techniques.
37
+
38
+ ## Installation
39
+
40
+ ```bash
41
+ pip install autofuzzts
@@ -0,0 +1,9 @@
1
+ # AutoFuzzTS
2
+
3
+ Automated fuzzy time series forecasting library in Python.
4
+ Build and evaluate time series models automatically using fuzzy logic and AutoML techniques.
5
+
6
+ ## Installation
7
+
8
+ ```bash
9
+ pip install autofuzzts
@@ -0,0 +1 @@
1
+ # __init__.py
@@ -0,0 +1,18 @@
1
+ # config.py
2
+
3
+ DEFAULT_CONFIG = {
4
+ "n_clusters": 3,
5
+ "number_of_lags": 5,
6
+ "plot_partition": False,
7
+ "pred_column": "Y",
8
+ "fuzzy_part_func": "triangle",
9
+ "n_rows": 0,
10
+ "sigma": 1.0,
11
+ "verbosity": False,
12
+ }
13
+
14
+ def get_config(custom_config=None):
15
+ config = DEFAULT_CONFIG.copy()
16
+ if custom_config:
17
+ config.update(custom_config)
18
+ return config
@@ -0,0 +1 @@
1
+ # __init__.py
@@ -0,0 +1,7 @@
1
+ import os
2
+ import pandas as pd
3
+
4
+ def load_sample_data(file_name):
5
+ data_path = os.path.join(os.path.dirname(__file__), 'sample_datasets', file_name)
6
+ print(f"Loading data from: {data_path}") # Print the constructed path
7
+ return pd.read_csv(data_path)
@@ -0,0 +1 @@
1
+ # __init__.py
@@ -0,0 +1,41 @@
1
+ import pandas as pd
2
+ import warnings
3
+
4
+
5
+ def validate_and_clean_input(df: pd.DataFrame, covariates:list[str] = None) -> pd.DataFrame:
6
+ """
7
+ Validates the input DataFrame, ensuring it contains at least one column.
8
+ If the DataFrame contains multiple columns, all columns except the first
9
+ are removed, and a warning is issued.
10
+
11
+ Parameters:
12
+ df (pd.DataFrame): The input DataFrame.
13
+
14
+ Returns:
15
+ pd.DataFrame: A DataFrame with only the first column retained.
16
+ """
17
+ if not isinstance(df, pd.DataFrame):
18
+ raise ValueError("Input data must be a pandas DataFrame.")
19
+
20
+ if df.shape[1] == 0:
21
+ raise ValueError("Input DataFrame must contain at least one column.")
22
+
23
+ # If there are multiple columns, keep only the first one and warn the user
24
+ if df.shape[1] > 1:
25
+ if covariates is None:
26
+ warnings.warn("Input DataFrame has multiple columns. Only the first column will be used.")
27
+ df = df[[df.columns[0]]]
28
+ else:
29
+ warnings.warn("Input DataFrame has multiple columns. Covariates will be used for modelling.")
30
+ df = df[[df.columns[0]] + covariates]
31
+
32
+
33
+
34
+ # Standardize column name to 'Y'
35
+ df = df.rename(columns={df.columns[0]: 'Y'})
36
+
37
+ # Convert all selected columns to numeric and fill NaNs with 0
38
+ df = df.apply(pd.to_numeric, errors='coerce').fillna(0)
39
+
40
+ return df
41
+
@@ -0,0 +1 @@
1
+ # __init__.py
@@ -0,0 +1 @@
1
+ # __init__.py
@@ -0,0 +1,83 @@
1
+ import xgboost as xgb
2
+ from typing import Literal
3
+ from tpot import TPOTClassifier
4
+ from autofuzzts.models.mlp_nas import build_model
5
+ from sklearn.calibration import CalibratedClassifierCV
6
+
7
+
8
+ class FuzzyPipelineModel:
9
+ def __init__(self, n_clusters: int, number_of_lags: int, verbosity: bool = False):
10
+ self.n_clusters = n_clusters
11
+ self.number_of_lags = number_of_lags
12
+ self.verbosity = verbosity
13
+ self.model = None # Placeholder for the fitted model
14
+
15
+ def fit(self, X_train, y_train, model_type:Literal['xgb', 'tpot','mlp']='xgb', **kwargs):
16
+ """
17
+ Fit the model based on the specified model type and input parameters.
18
+
19
+ Parameters:
20
+ - X_train: Features for training.
21
+ - y_train: Labels for training.
22
+ - model_type: 'xgboost', 'mlp', or 'tpot'.
23
+ - kwargs: Additional parameters for model fitting.
24
+ """
25
+ if model_type == 'xgb':
26
+ model = xgb.XGBClassifier(objective="multi:softmax", num_class=self.n_clusters, **kwargs)
27
+ self.model = model.fit(X_train, y_train)
28
+
29
+ elif model_type == 'tpot':
30
+ tpot = TPOTClassifier(
31
+ generations=kwargs.get('generations', 5),
32
+ population_size=kwargs.get('population_size', 10),
33
+ random_state=kwargs.get('random_state', 42),
34
+ max_time_mins=kwargs.get('max_time_mins', 2),
35
+ )
36
+ tpot.fit(X_train, y_train)
37
+ self.model = tpot.fitted_pipeline_
38
+
39
+ elif model_type == 'mlp':
40
+ mlp = build_model(X_train, y_train, **kwargs) # Adjust as necessary
41
+ self.model = mlp.fit(X_train, y_train)
42
+
43
+ else:
44
+ raise ValueError("Invalid model type. Choose 'xgb', 'mlp', or 'tpot'.")
45
+
46
+ return self.model
47
+
48
+ def calibrate(self, X_train, y_train, method='sigmoid', cv=5):
49
+ """
50
+ Calibrate the fitted model using CalibratedClassifierCV.
51
+
52
+ Parameters:
53
+ - X_train: Features for training (for calibration).
54
+ - y_train: Labels for training (for calibration).
55
+ - method: Calibration method ('sigmoid' or 'isotonic').
56
+ - cv: Cross-validation splitting strategy.
57
+ """
58
+ if self.model is None:
59
+ raise ValueError("Model is not fitted yet. Call 'fit' first.")
60
+
61
+ # Ensure the model supports predict_proba
62
+ if not hasattr(self.model, "predict_proba"):
63
+ raise AttributeError("The fitted model does not support predict_proba.")
64
+
65
+ # Initialize calibrated model
66
+ calibrated_model = CalibratedClassifierCV(self.model, method=method, cv=cv)
67
+ calibrated_model.fit(X_train, y_train)
68
+
69
+ # Replace the model with the calibrated model
70
+ self.model = calibrated_model
71
+
72
+ return self.model
73
+ def predict(self, X_test):
74
+ """Make predictions using the fitted model."""
75
+ if self.model is None:
76
+ raise ValueError("Model is not fitted yet. Call 'fit_model' first.")
77
+ return self.model.predict(X_test)
78
+
79
+ def predict_proba(self, X_test):
80
+ """Make predictions using the fitted model."""
81
+ if self.model is None:
82
+ raise ValueError("Model is not fitted yet. Call 'fit_model' first.")
83
+ return self.model.predict_proba(X_test)
@@ -0,0 +1,90 @@
1
+ from sklearn.neural_network import MLPClassifier # Assuming you use sklearn's MLP
2
+ from sklearn.model_selection import train_test_split
3
+ import optuna
4
+
5
+
6
+ def _objective(trial, X, y):
7
+ """
8
+ This is the objective function used for hyperparameter tuning
9
+ (internal function).
10
+ """
11
+ # ... Same code as before inside objective function ...
12
+
13
+ # Split data
14
+ x_train, x_test, y_train, y_test = train_test_split(
15
+ X, y, test_size=0.2, random_state=42
16
+ )
17
+
18
+ # Define and train the model with suggested hyperparameters
19
+ clf = MLPClassifier(
20
+ hidden_layer_sizes=tuple(
21
+ trial.suggest_int(f"n_units_{i}", 10, 100)
22
+ for i in range(trial.suggest_int("n_layers", 1, 4))
23
+ ),
24
+ activation=trial.suggest_categorical("activation", ["relu", "tanh"]),
25
+ solver="adam", # Using only 'adam' solver
26
+ alpha=trial.suggest_float("alpha", 1e-5, 1e-1, log=True),
27
+ learning_rate_init=trial.suggest_float("learning_rate_init", 1e-4, 1e-2, log=True),
28
+ max_iter=200,
29
+ random_state=42,
30
+ early_stopping=True,
31
+ n_iter_no_change=10,
32
+ )
33
+ clf.fit(x_train, y_train)
34
+
35
+ # Evaluate the model
36
+ return clf.score(x_test, y_test)
37
+
38
+ def build_model(X, y, n_trials=100):
39
+ """
40
+ This function performs hyperparameter tuning, builds, trains, and
41
+ returns a fitted MLP classifier model.
42
+
43
+ Args:
44
+ X: Training data features.
45
+ y: Training data labels.
46
+ n_trials: Number of trials for hyperparameter tuning (default: 100).
47
+
48
+ Returns:
49
+ A fitted MLPClassifier model.
50
+ """
51
+ # Define the neural network structure search space
52
+ study = optuna.create_study(direction="maximize")
53
+ study.optimize(lambda trial: _objective(trial, X, y), n_trials=n_trials)
54
+
55
+ # Print best parameters and best score
56
+ print(f"Best parameters: {study.best_params}")
57
+ print(f"Best score: {study.best_value}")
58
+
59
+ # Extract best hyperparameters
60
+ n_layers = study.best_params["n_layers"]
61
+ hidden_layer_sizes = tuple(
62
+ [study.best_params[f"n_units_{i}"] for i in range(n_layers)]
63
+ )
64
+ activation = study.best_params["activation"]
65
+ alpha = study.best_params["alpha"]
66
+ learning_rate_init = study.best_params["learning_rate_init"]
67
+
68
+ # Print model architecture
69
+ print("Model Architecture:")
70
+ print(f"\tNumber of Layers: {n_layers}")
71
+ print(f"\tHidden Layer Sizes: {hidden_layer_sizes}")
72
+ print(f"\tActivation Function: {activation}")
73
+ print(f"\tL2 Penalty (alpha): {alpha}")
74
+ print(f"\tInitial Learning Rate: {learning_rate_init}")
75
+
76
+ # Create the model with best hyperparameters
77
+ model = MLPClassifier(
78
+ hidden_layer_sizes=hidden_layer_sizes,
79
+ activation=activation,
80
+ solver="adam",
81
+ alpha=alpha,
82
+ learning_rate_init=learning_rate_init,
83
+ max_iter=200,
84
+ random_state=42,
85
+ early_stopping=True,
86
+ n_iter_no_change=10,
87
+ )
88
+
89
+ return model
90
+
@@ -0,0 +1 @@
1
+ # __init__.py
@@ -0,0 +1,107 @@
1
+ ## Functions for fuzzy clustering
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+
6
+ def fuzzy_partition_cosine(X: pd.Series, n: int):
7
+ """
8
+ Midsteps of the calculation:
9
+
10
+ D - distance vector (D) represents the relative position of each data point within the partition
11
+ h - height, spread of the fuzzy sets
12
+ """
13
+
14
+ n_rows = len(X)
15
+ x_min = X.min()
16
+ x_max = X.max()
17
+
18
+ D = np.linspace(x_min, x_max, n)
19
+ h = (D[-1] - D[0]) / (n - 1)
20
+
21
+ A = np.zeros((n_rows, n))
22
+
23
+ for k in range(n_rows):
24
+ # First column
25
+ if (D[0] <= X[k]) and (X[k] <= D[1]):
26
+ A[k, 0] = 0.5 * (np.cos(np.pi * (X[k] - D[0]) / h) + 1)
27
+
28
+ # Last column
29
+ elif (D[n - 2] <= X[k]) and (X[k] <= D[n - 1]):
30
+ A[k, n - 1] = 0.5 * (np.cos(np.pi * (X[k] - D[n - 1]) / h) + 1)
31
+
32
+ # All other columns
33
+ for j in range(1, n - 1):
34
+ if (D[j - 1] <= X[k]) and (X[k] <= D[j + 1]):
35
+ A[k, j] = 0.5 * (np.cos(np.pi * (X[k] - D[j]) / h) + 1)
36
+
37
+ return D, A
38
+
39
+
40
+ def fuzzy_partition_triangle(X: pd.Series, n: int):
41
+ """
42
+ Midsteps of the calculation:
43
+
44
+ D - distance vector (D) represents the relative position of each data point within the partition
45
+ h - height, spread of the fuzzy sets
46
+ """
47
+
48
+ n_rows = len(X)
49
+ x_min = X.min()
50
+ x_max = X.max()
51
+
52
+ D = np.linspace(x_min, x_max, n)
53
+ h = (D[-1] - D[0]) / (n - 1)
54
+
55
+ A = np.zeros((n_rows, n))
56
+
57
+ for k in range(n_rows):
58
+ # First column
59
+ if (D[0] <= X[k]) and (X[k] <= D[1]):
60
+ A[k, 0] = (D[1] - X[k]) / h
61
+
62
+ # Last column
63
+ elif (D[n - 2] <= X[k]) and (X[k] <= D[n - 1]):
64
+ A[k, n - 1] = (X[k] - D[n - 2]) / h
65
+
66
+ # All other columns
67
+ for j in range(1, n - 1):
68
+ if (D[j - 1] <= X[k]) and (X[k] <= D[j]):
69
+ A[k, j] = (X[k] - D[j - 1]) / h
70
+
71
+ if (D[j] <= X[k]) and (X[k] <= D[j + 1]):
72
+ A[k, j] = (D[j + 1] - X[k]) / h
73
+
74
+ return D, A
75
+
76
+
77
+ def fuzzy_partition_gauss(X: pd.Series, n: int, sigma: float = 1):
78
+ """
79
+ Midsteps of the calculation:
80
+
81
+ D - distance vector (D) represents the relative position of each data point within the partition
82
+ h - height, spread of the fuzzy sets
83
+ sigma - standard deviation of the Gaussian function
84
+ """
85
+
86
+ n_rows = len(X)
87
+ x_min = X.min()
88
+ x_max = X.max()
89
+
90
+ D = np.linspace(x_min, x_max, n)
91
+ A = np.zeros((n_rows, n))
92
+
93
+ for k in range(n_rows):
94
+ # First column
95
+ if (D[0] <= X[k]) and (X[k] <= D[1]):
96
+ A[k, 0] = np.exp(-((X[k] - D[0]) ** 2) / (2 * sigma**2))
97
+
98
+ # Last column
99
+ elif (D[n - 2] <= X[k]) and (X[k] <= D[n - 1]):
100
+ A[k, n - 1] = np.exp(-((X[k] - D[n - 1]) ** 2) / (2 * sigma**2))
101
+
102
+ # All other columns
103
+ for j in range(1, n - 1):
104
+ if (D[j - 1] <= X[k]) and (X[k] <= D[j + 1]):
105
+ A[k, j] = np.exp(-((X[k] - D[j]) ** 2) / (2 * sigma**2))
106
+
107
+ return D, A
@@ -0,0 +1,129 @@
1
+ ## Functions for fuzzy clustering
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+
6
+ def fuzzy_partition_cosine(X: pd.Series, n:float):
7
+ '''
8
+
9
+ Midsteps of the calculation:
10
+
11
+ D - distance vector (D) represents the relative position of each data point within the partition
12
+ h - height, spread of the fuzzy sets
13
+
14
+ '''
15
+
16
+ n_rows = len(X)
17
+ x_spread = X.max() - X.min() # spread of the data
18
+
19
+ D = np.zeros((n,1))
20
+ for i in range(0,n):
21
+ D[i] = i/(n-1)*x_spread # D is adjusted by the x_spread
22
+ h = (D[-1]-D[0])/(n-1)
23
+
24
+ A = np.zeros((n_rows,n))
25
+
26
+ x_sorted = np.sort(X) # sort the data
27
+
28
+ for k in range(0,n_rows):
29
+ if (D[0] <= x_sorted[k]) and (x_sorted[k] <= D[1]):
30
+ A[k, 0] = 0.5*(np.cos(np.pi*(x_sorted[k]-D[0])/h)+1)
31
+ else:
32
+ if (D[n - 2] <= x_sorted[k]) and (x_sorted[k] <= D[n-1]):
33
+ A[k, n-1] = 0.5*(np.cos(np.pi*(x_sorted[k]-D[n-1])/h)+1)
34
+ for j in range(1,n-1):
35
+ if (D[j - 1] <= x_sorted[k]) and (x_sorted[k] <= D[j+1]):
36
+ A[k,j]=0.5*(np.cos(np.pi*(x_sorted[k]-D[j])/h)+1)
37
+
38
+ return D,A
39
+
40
+
41
+
42
+
43
+
44
+ def fuzzy_partition_triangle(X: pd.Series, n:float):
45
+ '''
46
+
47
+ Midsteps of the calculation:
48
+
49
+ D - distance vector (D) represents the relative position of each data point within the partition
50
+ h - height, spread of the fuzzy sets
51
+
52
+ '''
53
+
54
+ n_rows = len(X)
55
+ x_spread = X.max() - X.min() # spread of the data
56
+
57
+ D = np.zeros((n,1))
58
+ for i in range(0,n):
59
+ D[i] = i/(n-1)*x_spread # D is adjusted by the x_spread
60
+ h = (D[-1]-D[0])/(n-1)
61
+
62
+ A = np.zeros((n_rows,n))
63
+
64
+ x_sorted = np.sort(X) # sort the data
65
+
66
+ for k in range(0,n_rows):
67
+
68
+ # First column
69
+ if (D[0] <= x_sorted[k]) and (x_sorted[k] <= D[1]):
70
+ A[k, 0] = (D[1]-x_sorted[k])/h
71
+
72
+ # Last column
73
+ else:
74
+ if (D[n - 2] <= x_sorted[k]) and (x_sorted[k] <= D[n-1]):
75
+ A[k, n-1] = (x_sorted[k]-D[n-2])/h
76
+
77
+ # All other columns
78
+ for j in range(1,n-1):
79
+ if (D[j - 1] <= x_sorted[k]) and (x_sorted[k]<= D[j]):
80
+ A[k,j] = (x_sorted[k]-D[j-1])/h
81
+
82
+ if (D[j] <= x_sorted[k]) and (x_sorted[k] <= D[j+1]):
83
+ A[k,j] = (D[j+1]-x_sorted[k])/h
84
+
85
+ return D,A
86
+
87
+
88
+ def fuzzy_partition_gauss(X: pd.Series, n:float, sigma:float = 1):
89
+ '''
90
+
91
+ Midsteps of the calculation:
92
+
93
+ D - distance vector (D) represents the relative position of each data point within the partition
94
+ h - height, spread of the fuzzy sets
95
+
96
+ '''
97
+
98
+ n_rows = len(X)
99
+ x_spread = X.max() - X.min() # spread of the data
100
+
101
+ D = np.zeros((n,1))
102
+ for i in range(0,n):
103
+ D[i] = i/(n-1)*x_spread # D is adjusted by the x_spread
104
+ h = (D[-1]-D[0])/(n-1)
105
+
106
+ A = np.zeros((n_rows,n))
107
+
108
+ x_sorted = np.sort(X) # sort the data
109
+
110
+ for k in range(0,n_rows):
111
+
112
+ # First column
113
+ if (D[0] <= x_sorted[k]) and (x_sorted[k] <= D[1]):
114
+ A[k, 0] = np.exp(-(x_sorted[k] - D[0]) ** 2 / (2 * sigma ** 2))
115
+
116
+ # Last column
117
+ else:
118
+ if (D[n - 2] <= k) and (x_sorted[k] <= D[n-1]):
119
+ A[k, n-1] = np.exp(-(x_sorted[k] - D[n-1]) ** 2 / (2 * sigma ** 2))
120
+
121
+
122
+ # All other columns
123
+ for j in range(1,n-1):
124
+ if (D[j - 1] <= x_sorted[k]) and (x_sorted[k] <= D[j+1]):
125
+ A[k,j] = np.exp(-(x_sorted[k] - D[j]) ** 2 / (2 * sigma ** 2))
126
+
127
+
128
+ return D,A
129
+
@@ -0,0 +1,110 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from typing import Union, Literal
4
+ import warnings
5
+ from sklearn.preprocessing import MinMaxScaler
6
+
7
+ from autofuzzts.partition.fuzzy_clust_fun import (
8
+ fuzzy_partition_cosine,
9
+ fuzzy_partition_triangle,
10
+ fuzzy_partition_gauss,
11
+ )
12
+
13
+ class FuzzyPartition:
14
+ def __init__(self, fuzzy_function: Literal["cosine", "triangle", "gauss"], n_clusters: int, sigma: float, scaler: MinMaxScaler, verbosity: bool = False):
15
+ self.fuzzy_function = self._get_fuzzy_partition_func(fuzzy_function)
16
+ self.n_clusters = n_clusters
17
+ self.sigma = sigma
18
+ self.verbosity = verbosity
19
+ self.scaler = scaler
20
+
21
+ if scaler is None: # Check if scaler is None
22
+ warnings.warn("Scaler must be provided for inverse transformation.")
23
+
24
+ def _get_fuzzy_partition_func(self, fuzzy_part_func: Union[str, None]):
25
+ if fuzzy_part_func == "cosine":
26
+ return fuzzy_partition_cosine # Replace with actual function
27
+ elif fuzzy_part_func == "triangle":
28
+ return fuzzy_partition_triangle # Replace with actual function
29
+ elif fuzzy_part_func == "gauss":
30
+ return fuzzy_partition_gauss # Replace with actual function
31
+ else:
32
+ return fuzzy_partition_cosine # Default function
33
+
34
+ def fuzzy_partition(self, X: np.ndarray) -> pd.DataFrame:
35
+ """
36
+ Perform fuzzy partitioning on the target variable X.
37
+
38
+ Parameters:
39
+ X (np.ndarray): Input data to be partitioned.
40
+
41
+ Returns:
42
+ pd.DataFrame: DataFrame containing partition results.
43
+ """
44
+ # Perform fuzzy partitioning using the selected function
45
+ if self.fuzzy_function.__name__ == "fuzzy_partition_gauss":
46
+ D, A = self.fuzzy_function(X=X, n=self.n_clusters, sigma=self.sigma)
47
+ else:
48
+ D, A = self.fuzzy_function(X=X, n=self.n_clusters)
49
+
50
+ center_points = list(D.flatten())
51
+ center_points = [round(i, 2) for i in center_points]
52
+ center_points = np.array(center_points)
53
+
54
+ if self.verbosity:
55
+ print("Cluster center points:", center_points)
56
+
57
+ # Unscaled center points
58
+ center_points_unscaled = self.scaler.inverse_transform(
59
+ center_points.reshape(-1, 1)
60
+ )
61
+ self.center_points_unscaled = center_points_unscaled.flatten()
62
+ if self.verbosity:
63
+ print("Cluster center points unscaled:", self.center_points_unscaled.flatten())
64
+
65
+ # Create a DataFrame for membership values
66
+ A_df = pd.DataFrame(A)
67
+ A_df.columns = ["set_" + str(i) for i in range(A_df.shape[1])]
68
+
69
+ # Prepare the fuzzy partition DataFrame
70
+ fp_df = A_df.copy()
71
+ fp_df.insert(0, "X_value", X)
72
+ fp_df["membership_value"] = fp_df.iloc[:, 1:].max(axis=1)
73
+ fp_df["cluster"] = fp_df.iloc[:, 1:].idxmax(axis=1)
74
+
75
+ # Initialize 'left' and 'right' columns
76
+ fp_df["left"] = 0
77
+ fp_df["right"] = 0
78
+
79
+ # Define sets for left and right logic
80
+ set_min = "set_0"
81
+ set_max = "set_" + str(len(center_points) - 1)
82
+
83
+ # Set left and right for min and max sets
84
+ fp_df.loc[fp_df["cluster"] == set_min, "right"] = 1
85
+ fp_df.loc[fp_df["cluster"] == set_max, "left"] = 1
86
+
87
+ fp_df["center_point"] = ""
88
+ fp_df.loc[fp_df["cluster"] == set_min, "center_point"] = 0
89
+ fp_df.loc[fp_df["cluster"] == set_max, "center_point"] = 1
90
+
91
+ # Logic for intermediate clusters
92
+ for i in range(1, len(center_points) - 1):
93
+ set_i = "set_" + str(i)
94
+ fp_df.loc[fp_df["cluster"] == set_i, "center_point"] = center_points[i]
95
+ fp_df.loc[
96
+ (fp_df["cluster"] == set_i) & (fp_df["X_value"] >= center_points[i]),
97
+ "right",
98
+ ] = 1
99
+ fp_df.loc[
100
+ (fp_df["cluster"] == set_i) & (fp_df["X_value"] < center_points[i]),
101
+ "left",
102
+ ] = 1
103
+
104
+ # Ensure membership values are non-negative
105
+ fp_df.loc[fp_df["membership_value"] < 0, "membership_value"] = 0
106
+
107
+ # Keep only relevant columns
108
+ fp_df = fp_df.loc[:, ["X_value", "membership_value", "cluster", "left"]]
109
+
110
+ return fp_df, center_points, center_points_unscaled.flatten()