autofuzzts 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autofuzzts/__init__.py +1 -0
- autofuzzts/config.py +18 -0
- autofuzzts/data/__init__.py +1 -0
- autofuzzts/data/data_loader.py +7 -0
- autofuzzts/data_validation/__init__.py +1 -0
- autofuzzts/data_validation/validate.py +41 -0
- autofuzzts/evaluation/__init__.py +1 -0
- autofuzzts/models/__init__.py +1 -0
- autofuzzts/models/fuzzy_classifier.py +83 -0
- autofuzzts/models/mlp_nas.py +90 -0
- autofuzzts/partition/__init__.py +1 -0
- autofuzzts/partition/fuzzy_clust_fun.py +107 -0
- autofuzzts/partition/fuzzy_clust_fun_orig.py +129 -0
- autofuzzts/partition/partition.py +110 -0
- autofuzzts/partition/visualize_partition.py +32 -0
- autofuzzts/pipeline.py +383 -0
- autofuzzts/preprocess/__init__.py +1 -0
- autofuzzts/preprocess/prep_for_model.py +70 -0
- autofuzzts/preprocess/preprocess.py +63 -0
- autofuzzts/utils.py +1 -0
- autofuzzts-0.1.0.dist-info/METADATA +41 -0
- autofuzzts-0.1.0.dist-info/RECORD +25 -0
- autofuzzts-0.1.0.dist-info/WHEEL +5 -0
- autofuzzts-0.1.0.dist-info/licenses/LICENSE +21 -0
- autofuzzts-0.1.0.dist-info/top_level.txt +1 -0
autofuzzts/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# __init__.py
|
autofuzzts/config.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# config.py
|
|
2
|
+
|
|
3
|
+
DEFAULT_CONFIG = {
|
|
4
|
+
"n_clusters": 3,
|
|
5
|
+
"number_of_lags": 5,
|
|
6
|
+
"plot_partition": False,
|
|
7
|
+
"pred_column": "Y",
|
|
8
|
+
"fuzzy_part_func": "triangle",
|
|
9
|
+
"n_rows": 0,
|
|
10
|
+
"sigma": 1.0,
|
|
11
|
+
"verbosity": False,
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
def get_config(custom_config=None):
|
|
15
|
+
config = DEFAULT_CONFIG.copy()
|
|
16
|
+
if custom_config:
|
|
17
|
+
config.update(custom_config)
|
|
18
|
+
return config
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# __init__.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# __init__.py
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import warnings
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def validate_and_clean_input(df: pd.DataFrame, covariates:list[str] = None) -> pd.DataFrame:
|
|
6
|
+
"""
|
|
7
|
+
Validates the input DataFrame, ensuring it contains at least one column.
|
|
8
|
+
If the DataFrame contains multiple columns, all columns except the first
|
|
9
|
+
are removed, and a warning is issued.
|
|
10
|
+
|
|
11
|
+
Parameters:
|
|
12
|
+
df (pd.DataFrame): The input DataFrame.
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
pd.DataFrame: A DataFrame with only the first column retained.
|
|
16
|
+
"""
|
|
17
|
+
if not isinstance(df, pd.DataFrame):
|
|
18
|
+
raise ValueError("Input data must be a pandas DataFrame.")
|
|
19
|
+
|
|
20
|
+
if df.shape[1] == 0:
|
|
21
|
+
raise ValueError("Input DataFrame must contain at least one column.")
|
|
22
|
+
|
|
23
|
+
# If there are multiple columns, keep only the first one and warn the user
|
|
24
|
+
if df.shape[1] > 1:
|
|
25
|
+
if covariates is None:
|
|
26
|
+
warnings.warn("Input DataFrame has multiple columns. Only the first column will be used.")
|
|
27
|
+
df = df[[df.columns[0]]]
|
|
28
|
+
else:
|
|
29
|
+
warnings.warn("Input DataFrame has multiple columns. Covariates will be used for modelling.")
|
|
30
|
+
df = df[[df.columns[0]] + covariates]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# Standardize column name to 'Y'
|
|
35
|
+
df = df.rename(columns={df.columns[0]: 'Y'})
|
|
36
|
+
|
|
37
|
+
# Convert all selected columns to numeric and fill NaNs with 0
|
|
38
|
+
df = df.apply(pd.to_numeric, errors='coerce').fillna(0)
|
|
39
|
+
|
|
40
|
+
return df
|
|
41
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# __init__.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# __init__.py
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import xgboost as xgb
|
|
2
|
+
from typing import Literal
|
|
3
|
+
from tpot import TPOTClassifier
|
|
4
|
+
from autofuzzts.models.mlp_nas import build_model
|
|
5
|
+
from sklearn.calibration import CalibratedClassifierCV
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FuzzyPipelineModel:
|
|
9
|
+
def __init__(self, n_clusters: int, number_of_lags: int, verbosity: bool = False):
|
|
10
|
+
self.n_clusters = n_clusters
|
|
11
|
+
self.number_of_lags = number_of_lags
|
|
12
|
+
self.verbosity = verbosity
|
|
13
|
+
self.model = None # Placeholder for the fitted model
|
|
14
|
+
|
|
15
|
+
def fit(self, X_train, y_train, model_type:Literal['xgb', 'tpot','mlp']='xgb', **kwargs):
|
|
16
|
+
"""
|
|
17
|
+
Fit the model based on the specified model type and input parameters.
|
|
18
|
+
|
|
19
|
+
Parameters:
|
|
20
|
+
- X_train: Features for training.
|
|
21
|
+
- y_train: Labels for training.
|
|
22
|
+
- model_type: 'xgboost', 'mlp', or 'tpot'.
|
|
23
|
+
- kwargs: Additional parameters for model fitting.
|
|
24
|
+
"""
|
|
25
|
+
if model_type == 'xgb':
|
|
26
|
+
model = xgb.XGBClassifier(objective="multi:softmax", num_class=self.n_clusters, **kwargs)
|
|
27
|
+
self.model = model.fit(X_train, y_train)
|
|
28
|
+
|
|
29
|
+
elif model_type == 'tpot':
|
|
30
|
+
tpot = TPOTClassifier(
|
|
31
|
+
generations=kwargs.get('generations', 5),
|
|
32
|
+
population_size=kwargs.get('population_size', 10),
|
|
33
|
+
random_state=kwargs.get('random_state', 42),
|
|
34
|
+
max_time_mins=kwargs.get('max_time_mins', 2),
|
|
35
|
+
)
|
|
36
|
+
tpot.fit(X_train, y_train)
|
|
37
|
+
self.model = tpot.fitted_pipeline_
|
|
38
|
+
|
|
39
|
+
elif model_type == 'mlp':
|
|
40
|
+
mlp = build_model(X_train, y_train, **kwargs) # Adjust as necessary
|
|
41
|
+
self.model = mlp.fit(X_train, y_train)
|
|
42
|
+
|
|
43
|
+
else:
|
|
44
|
+
raise ValueError("Invalid model type. Choose 'xgb', 'mlp', or 'tpot'.")
|
|
45
|
+
|
|
46
|
+
return self.model
|
|
47
|
+
|
|
48
|
+
def calibrate(self, X_train, y_train, method='sigmoid', cv=5):
|
|
49
|
+
"""
|
|
50
|
+
Calibrate the fitted model using CalibratedClassifierCV.
|
|
51
|
+
|
|
52
|
+
Parameters:
|
|
53
|
+
- X_train: Features for training (for calibration).
|
|
54
|
+
- y_train: Labels for training (for calibration).
|
|
55
|
+
- method: Calibration method ('sigmoid' or 'isotonic').
|
|
56
|
+
- cv: Cross-validation splitting strategy.
|
|
57
|
+
"""
|
|
58
|
+
if self.model is None:
|
|
59
|
+
raise ValueError("Model is not fitted yet. Call 'fit' first.")
|
|
60
|
+
|
|
61
|
+
# Ensure the model supports predict_proba
|
|
62
|
+
if not hasattr(self.model, "predict_proba"):
|
|
63
|
+
raise AttributeError("The fitted model does not support predict_proba.")
|
|
64
|
+
|
|
65
|
+
# Initialize calibrated model
|
|
66
|
+
calibrated_model = CalibratedClassifierCV(self.model, method=method, cv=cv)
|
|
67
|
+
calibrated_model.fit(X_train, y_train)
|
|
68
|
+
|
|
69
|
+
# Replace the model with the calibrated model
|
|
70
|
+
self.model = calibrated_model
|
|
71
|
+
|
|
72
|
+
return self.model
|
|
73
|
+
def predict(self, X_test):
|
|
74
|
+
"""Make predictions using the fitted model."""
|
|
75
|
+
if self.model is None:
|
|
76
|
+
raise ValueError("Model is not fitted yet. Call 'fit_model' first.")
|
|
77
|
+
return self.model.predict(X_test)
|
|
78
|
+
|
|
79
|
+
def predict_proba(self, X_test):
|
|
80
|
+
"""Make predictions using the fitted model."""
|
|
81
|
+
if self.model is None:
|
|
82
|
+
raise ValueError("Model is not fitted yet. Call 'fit_model' first.")
|
|
83
|
+
return self.model.predict_proba(X_test)
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from sklearn.neural_network import MLPClassifier # Assuming you use sklearn's MLP
|
|
2
|
+
from sklearn.model_selection import train_test_split
|
|
3
|
+
import optuna
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _objective(trial, X, y):
|
|
7
|
+
"""
|
|
8
|
+
This is the objective function used for hyperparameter tuning
|
|
9
|
+
(internal function).
|
|
10
|
+
"""
|
|
11
|
+
# ... Same code as before inside objective function ...
|
|
12
|
+
|
|
13
|
+
# Split data
|
|
14
|
+
x_train, x_test, y_train, y_test = train_test_split(
|
|
15
|
+
X, y, test_size=0.2, random_state=42
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# Define and train the model with suggested hyperparameters
|
|
19
|
+
clf = MLPClassifier(
|
|
20
|
+
hidden_layer_sizes=tuple(
|
|
21
|
+
trial.suggest_int(f"n_units_{i}", 10, 100)
|
|
22
|
+
for i in range(trial.suggest_int("n_layers", 1, 4))
|
|
23
|
+
),
|
|
24
|
+
activation=trial.suggest_categorical("activation", ["relu", "tanh"]),
|
|
25
|
+
solver="adam", # Using only 'adam' solver
|
|
26
|
+
alpha=trial.suggest_float("alpha", 1e-5, 1e-1, log=True),
|
|
27
|
+
learning_rate_init=trial.suggest_float("learning_rate_init", 1e-4, 1e-2, log=True),
|
|
28
|
+
max_iter=200,
|
|
29
|
+
random_state=42,
|
|
30
|
+
early_stopping=True,
|
|
31
|
+
n_iter_no_change=10,
|
|
32
|
+
)
|
|
33
|
+
clf.fit(x_train, y_train)
|
|
34
|
+
|
|
35
|
+
# Evaluate the model
|
|
36
|
+
return clf.score(x_test, y_test)
|
|
37
|
+
|
|
38
|
+
def build_model(X, y, n_trials=100):
|
|
39
|
+
"""
|
|
40
|
+
This function performs hyperparameter tuning, builds, trains, and
|
|
41
|
+
returns a fitted MLP classifier model.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
X: Training data features.
|
|
45
|
+
y: Training data labels.
|
|
46
|
+
n_trials: Number of trials for hyperparameter tuning (default: 100).
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
A fitted MLPClassifier model.
|
|
50
|
+
"""
|
|
51
|
+
# Define the neural network structure search space
|
|
52
|
+
study = optuna.create_study(direction="maximize")
|
|
53
|
+
study.optimize(lambda trial: _objective(trial, X, y), n_trials=n_trials)
|
|
54
|
+
|
|
55
|
+
# Print best parameters and best score
|
|
56
|
+
print(f"Best parameters: {study.best_params}")
|
|
57
|
+
print(f"Best score: {study.best_value}")
|
|
58
|
+
|
|
59
|
+
# Extract best hyperparameters
|
|
60
|
+
n_layers = study.best_params["n_layers"]
|
|
61
|
+
hidden_layer_sizes = tuple(
|
|
62
|
+
[study.best_params[f"n_units_{i}"] for i in range(n_layers)]
|
|
63
|
+
)
|
|
64
|
+
activation = study.best_params["activation"]
|
|
65
|
+
alpha = study.best_params["alpha"]
|
|
66
|
+
learning_rate_init = study.best_params["learning_rate_init"]
|
|
67
|
+
|
|
68
|
+
# Print model architecture
|
|
69
|
+
print("Model Architecture:")
|
|
70
|
+
print(f"\tNumber of Layers: {n_layers}")
|
|
71
|
+
print(f"\tHidden Layer Sizes: {hidden_layer_sizes}")
|
|
72
|
+
print(f"\tActivation Function: {activation}")
|
|
73
|
+
print(f"\tL2 Penalty (alpha): {alpha}")
|
|
74
|
+
print(f"\tInitial Learning Rate: {learning_rate_init}")
|
|
75
|
+
|
|
76
|
+
# Create the model with best hyperparameters
|
|
77
|
+
model = MLPClassifier(
|
|
78
|
+
hidden_layer_sizes=hidden_layer_sizes,
|
|
79
|
+
activation=activation,
|
|
80
|
+
solver="adam",
|
|
81
|
+
alpha=alpha,
|
|
82
|
+
learning_rate_init=learning_rate_init,
|
|
83
|
+
max_iter=200,
|
|
84
|
+
random_state=42,
|
|
85
|
+
early_stopping=True,
|
|
86
|
+
n_iter_no_change=10,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
return model
|
|
90
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# __init__.py
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
## Functions for fuzzy clustering
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def fuzzy_partition_cosine(X: pd.Series, n: int):
|
|
7
|
+
"""
|
|
8
|
+
Midsteps of the calculation:
|
|
9
|
+
|
|
10
|
+
D - distance vector (D) represents the relative position of each data point within the partition
|
|
11
|
+
h - height, spread of the fuzzy sets
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
n_rows = len(X)
|
|
15
|
+
x_min = X.min()
|
|
16
|
+
x_max = X.max()
|
|
17
|
+
|
|
18
|
+
D = np.linspace(x_min, x_max, n)
|
|
19
|
+
h = (D[-1] - D[0]) / (n - 1)
|
|
20
|
+
|
|
21
|
+
A = np.zeros((n_rows, n))
|
|
22
|
+
|
|
23
|
+
for k in range(n_rows):
|
|
24
|
+
# First column
|
|
25
|
+
if (D[0] <= X[k]) and (X[k] <= D[1]):
|
|
26
|
+
A[k, 0] = 0.5 * (np.cos(np.pi * (X[k] - D[0]) / h) + 1)
|
|
27
|
+
|
|
28
|
+
# Last column
|
|
29
|
+
elif (D[n - 2] <= X[k]) and (X[k] <= D[n - 1]):
|
|
30
|
+
A[k, n - 1] = 0.5 * (np.cos(np.pi * (X[k] - D[n - 1]) / h) + 1)
|
|
31
|
+
|
|
32
|
+
# All other columns
|
|
33
|
+
for j in range(1, n - 1):
|
|
34
|
+
if (D[j - 1] <= X[k]) and (X[k] <= D[j + 1]):
|
|
35
|
+
A[k, j] = 0.5 * (np.cos(np.pi * (X[k] - D[j]) / h) + 1)
|
|
36
|
+
|
|
37
|
+
return D, A
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def fuzzy_partition_triangle(X: pd.Series, n: int):
|
|
41
|
+
"""
|
|
42
|
+
Midsteps of the calculation:
|
|
43
|
+
|
|
44
|
+
D - distance vector (D) represents the relative position of each data point within the partition
|
|
45
|
+
h - height, spread of the fuzzy sets
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
n_rows = len(X)
|
|
49
|
+
x_min = X.min()
|
|
50
|
+
x_max = X.max()
|
|
51
|
+
|
|
52
|
+
D = np.linspace(x_min, x_max, n)
|
|
53
|
+
h = (D[-1] - D[0]) / (n - 1)
|
|
54
|
+
|
|
55
|
+
A = np.zeros((n_rows, n))
|
|
56
|
+
|
|
57
|
+
for k in range(n_rows):
|
|
58
|
+
# First column
|
|
59
|
+
if (D[0] <= X[k]) and (X[k] <= D[1]):
|
|
60
|
+
A[k, 0] = (D[1] - X[k]) / h
|
|
61
|
+
|
|
62
|
+
# Last column
|
|
63
|
+
elif (D[n - 2] <= X[k]) and (X[k] <= D[n - 1]):
|
|
64
|
+
A[k, n - 1] = (X[k] - D[n - 2]) / h
|
|
65
|
+
|
|
66
|
+
# All other columns
|
|
67
|
+
for j in range(1, n - 1):
|
|
68
|
+
if (D[j - 1] <= X[k]) and (X[k] <= D[j]):
|
|
69
|
+
A[k, j] = (X[k] - D[j - 1]) / h
|
|
70
|
+
|
|
71
|
+
if (D[j] <= X[k]) and (X[k] <= D[j + 1]):
|
|
72
|
+
A[k, j] = (D[j + 1] - X[k]) / h
|
|
73
|
+
|
|
74
|
+
return D, A
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def fuzzy_partition_gauss(X: pd.Series, n: int, sigma: float = 1):
|
|
78
|
+
"""
|
|
79
|
+
Midsteps of the calculation:
|
|
80
|
+
|
|
81
|
+
D - distance vector (D) represents the relative position of each data point within the partition
|
|
82
|
+
h - height, spread of the fuzzy sets
|
|
83
|
+
sigma - standard deviation of the Gaussian function
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
n_rows = len(X)
|
|
87
|
+
x_min = X.min()
|
|
88
|
+
x_max = X.max()
|
|
89
|
+
|
|
90
|
+
D = np.linspace(x_min, x_max, n)
|
|
91
|
+
A = np.zeros((n_rows, n))
|
|
92
|
+
|
|
93
|
+
for k in range(n_rows):
|
|
94
|
+
# First column
|
|
95
|
+
if (D[0] <= X[k]) and (X[k] <= D[1]):
|
|
96
|
+
A[k, 0] = np.exp(-((X[k] - D[0]) ** 2) / (2 * sigma**2))
|
|
97
|
+
|
|
98
|
+
# Last column
|
|
99
|
+
elif (D[n - 2] <= X[k]) and (X[k] <= D[n - 1]):
|
|
100
|
+
A[k, n - 1] = np.exp(-((X[k] - D[n - 1]) ** 2) / (2 * sigma**2))
|
|
101
|
+
|
|
102
|
+
# All other columns
|
|
103
|
+
for j in range(1, n - 1):
|
|
104
|
+
if (D[j - 1] <= X[k]) and (X[k] <= D[j + 1]):
|
|
105
|
+
A[k, j] = np.exp(-((X[k] - D[j]) ** 2) / (2 * sigma**2))
|
|
106
|
+
|
|
107
|
+
return D, A
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
## Functions for fuzzy clustering
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def fuzzy_partition_cosine(X: pd.Series, n:float):
|
|
7
|
+
'''
|
|
8
|
+
|
|
9
|
+
Midsteps of the calculation:
|
|
10
|
+
|
|
11
|
+
D - distance vector (D) represents the relative position of each data point within the partition
|
|
12
|
+
h - height, spread of the fuzzy sets
|
|
13
|
+
|
|
14
|
+
'''
|
|
15
|
+
|
|
16
|
+
n_rows = len(X)
|
|
17
|
+
x_spread = X.max() - X.min() # spread of the data
|
|
18
|
+
|
|
19
|
+
D = np.zeros((n,1))
|
|
20
|
+
for i in range(0,n):
|
|
21
|
+
D[i] = i/(n-1)*x_spread # D is adjusted by the x_spread
|
|
22
|
+
h = (D[-1]-D[0])/(n-1)
|
|
23
|
+
|
|
24
|
+
A = np.zeros((n_rows,n))
|
|
25
|
+
|
|
26
|
+
x_sorted = np.sort(X) # sort the data
|
|
27
|
+
|
|
28
|
+
for k in range(0,n_rows):
|
|
29
|
+
if (D[0] <= x_sorted[k]) and (x_sorted[k] <= D[1]):
|
|
30
|
+
A[k, 0] = 0.5*(np.cos(np.pi*(x_sorted[k]-D[0])/h)+1)
|
|
31
|
+
else:
|
|
32
|
+
if (D[n - 2] <= x_sorted[k]) and (x_sorted[k] <= D[n-1]):
|
|
33
|
+
A[k, n-1] = 0.5*(np.cos(np.pi*(x_sorted[k]-D[n-1])/h)+1)
|
|
34
|
+
for j in range(1,n-1):
|
|
35
|
+
if (D[j - 1] <= x_sorted[k]) and (x_sorted[k] <= D[j+1]):
|
|
36
|
+
A[k,j]=0.5*(np.cos(np.pi*(x_sorted[k]-D[j])/h)+1)
|
|
37
|
+
|
|
38
|
+
return D,A
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def fuzzy_partition_triangle(X: pd.Series, n:float):
|
|
45
|
+
'''
|
|
46
|
+
|
|
47
|
+
Midsteps of the calculation:
|
|
48
|
+
|
|
49
|
+
D - distance vector (D) represents the relative position of each data point within the partition
|
|
50
|
+
h - height, spread of the fuzzy sets
|
|
51
|
+
|
|
52
|
+
'''
|
|
53
|
+
|
|
54
|
+
n_rows = len(X)
|
|
55
|
+
x_spread = X.max() - X.min() # spread of the data
|
|
56
|
+
|
|
57
|
+
D = np.zeros((n,1))
|
|
58
|
+
for i in range(0,n):
|
|
59
|
+
D[i] = i/(n-1)*x_spread # D is adjusted by the x_spread
|
|
60
|
+
h = (D[-1]-D[0])/(n-1)
|
|
61
|
+
|
|
62
|
+
A = np.zeros((n_rows,n))
|
|
63
|
+
|
|
64
|
+
x_sorted = np.sort(X) # sort the data
|
|
65
|
+
|
|
66
|
+
for k in range(0,n_rows):
|
|
67
|
+
|
|
68
|
+
# First column
|
|
69
|
+
if (D[0] <= x_sorted[k]) and (x_sorted[k] <= D[1]):
|
|
70
|
+
A[k, 0] = (D[1]-x_sorted[k])/h
|
|
71
|
+
|
|
72
|
+
# Last column
|
|
73
|
+
else:
|
|
74
|
+
if (D[n - 2] <= x_sorted[k]) and (x_sorted[k] <= D[n-1]):
|
|
75
|
+
A[k, n-1] = (x_sorted[k]-D[n-2])/h
|
|
76
|
+
|
|
77
|
+
# All other columns
|
|
78
|
+
for j in range(1,n-1):
|
|
79
|
+
if (D[j - 1] <= x_sorted[k]) and (x_sorted[k]<= D[j]):
|
|
80
|
+
A[k,j] = (x_sorted[k]-D[j-1])/h
|
|
81
|
+
|
|
82
|
+
if (D[j] <= x_sorted[k]) and (x_sorted[k] <= D[j+1]):
|
|
83
|
+
A[k,j] = (D[j+1]-x_sorted[k])/h
|
|
84
|
+
|
|
85
|
+
return D,A
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def fuzzy_partition_gauss(X: pd.Series, n:float, sigma:float = 1):
|
|
89
|
+
'''
|
|
90
|
+
|
|
91
|
+
Midsteps of the calculation:
|
|
92
|
+
|
|
93
|
+
D - distance vector (D) represents the relative position of each data point within the partition
|
|
94
|
+
h - height, spread of the fuzzy sets
|
|
95
|
+
|
|
96
|
+
'''
|
|
97
|
+
|
|
98
|
+
n_rows = len(X)
|
|
99
|
+
x_spread = X.max() - X.min() # spread of the data
|
|
100
|
+
|
|
101
|
+
D = np.zeros((n,1))
|
|
102
|
+
for i in range(0,n):
|
|
103
|
+
D[i] = i/(n-1)*x_spread # D is adjusted by the x_spread
|
|
104
|
+
h = (D[-1]-D[0])/(n-1)
|
|
105
|
+
|
|
106
|
+
A = np.zeros((n_rows,n))
|
|
107
|
+
|
|
108
|
+
x_sorted = np.sort(X) # sort the data
|
|
109
|
+
|
|
110
|
+
for k in range(0,n_rows):
|
|
111
|
+
|
|
112
|
+
# First column
|
|
113
|
+
if (D[0] <= x_sorted[k]) and (x_sorted[k] <= D[1]):
|
|
114
|
+
A[k, 0] = np.exp(-(x_sorted[k] - D[0]) ** 2 / (2 * sigma ** 2))
|
|
115
|
+
|
|
116
|
+
# Last column
|
|
117
|
+
else:
|
|
118
|
+
if (D[n - 2] <= k) and (x_sorted[k] <= D[n-1]):
|
|
119
|
+
A[k, n-1] = np.exp(-(x_sorted[k] - D[n-1]) ** 2 / (2 * sigma ** 2))
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# All other columns
|
|
123
|
+
for j in range(1,n-1):
|
|
124
|
+
if (D[j - 1] <= x_sorted[k]) and (x_sorted[k] <= D[j+1]):
|
|
125
|
+
A[k,j] = np.exp(-(x_sorted[k] - D[j]) ** 2 / (2 * sigma ** 2))
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
return D,A
|
|
129
|
+
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from typing import Union, Literal
|
|
4
|
+
import warnings
|
|
5
|
+
from sklearn.preprocessing import MinMaxScaler
|
|
6
|
+
|
|
7
|
+
from autofuzzts.partition.fuzzy_clust_fun import (
|
|
8
|
+
fuzzy_partition_cosine,
|
|
9
|
+
fuzzy_partition_triangle,
|
|
10
|
+
fuzzy_partition_gauss,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
class FuzzyPartition:
|
|
14
|
+
def __init__(self, fuzzy_function: Literal["cosine", "triangle", "gauss"], n_clusters: int, sigma: float, scaler: MinMaxScaler, verbosity: bool = False):
|
|
15
|
+
self.fuzzy_function = self._get_fuzzy_partition_func(fuzzy_function)
|
|
16
|
+
self.n_clusters = n_clusters
|
|
17
|
+
self.sigma = sigma
|
|
18
|
+
self.verbosity = verbosity
|
|
19
|
+
self.scaler = scaler
|
|
20
|
+
|
|
21
|
+
if scaler is None: # Check if scaler is None
|
|
22
|
+
warnings.warn("Scaler must be provided for inverse transformation.")
|
|
23
|
+
|
|
24
|
+
def _get_fuzzy_partition_func(self, fuzzy_part_func: Union[str, None]):
|
|
25
|
+
if fuzzy_part_func == "cosine":
|
|
26
|
+
return fuzzy_partition_cosine # Replace with actual function
|
|
27
|
+
elif fuzzy_part_func == "triangle":
|
|
28
|
+
return fuzzy_partition_triangle # Replace with actual function
|
|
29
|
+
elif fuzzy_part_func == "gauss":
|
|
30
|
+
return fuzzy_partition_gauss # Replace with actual function
|
|
31
|
+
else:
|
|
32
|
+
return fuzzy_partition_cosine # Default function
|
|
33
|
+
|
|
34
|
+
def fuzzy_partition(self, X: np.ndarray) -> pd.DataFrame:
|
|
35
|
+
"""
|
|
36
|
+
Perform fuzzy partitioning on the target variable X.
|
|
37
|
+
|
|
38
|
+
Parameters:
|
|
39
|
+
X (np.ndarray): Input data to be partitioned.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
pd.DataFrame: DataFrame containing partition results.
|
|
43
|
+
"""
|
|
44
|
+
# Perform fuzzy partitioning using the selected function
|
|
45
|
+
if self.fuzzy_function.__name__ == "fuzzy_partition_gauss":
|
|
46
|
+
D, A = self.fuzzy_function(X=X, n=self.n_clusters, sigma=self.sigma)
|
|
47
|
+
else:
|
|
48
|
+
D, A = self.fuzzy_function(X=X, n=self.n_clusters)
|
|
49
|
+
|
|
50
|
+
center_points = list(D.flatten())
|
|
51
|
+
center_points = [round(i, 2) for i in center_points]
|
|
52
|
+
center_points = np.array(center_points)
|
|
53
|
+
|
|
54
|
+
if self.verbosity:
|
|
55
|
+
print("Cluster center points:", center_points)
|
|
56
|
+
|
|
57
|
+
# Unscaled center points
|
|
58
|
+
center_points_unscaled = self.scaler.inverse_transform(
|
|
59
|
+
center_points.reshape(-1, 1)
|
|
60
|
+
)
|
|
61
|
+
self.center_points_unscaled = center_points_unscaled.flatten()
|
|
62
|
+
if self.verbosity:
|
|
63
|
+
print("Cluster center points unscaled:", self.center_points_unscaled.flatten())
|
|
64
|
+
|
|
65
|
+
# Create a DataFrame for membership values
|
|
66
|
+
A_df = pd.DataFrame(A)
|
|
67
|
+
A_df.columns = ["set_" + str(i) for i in range(A_df.shape[1])]
|
|
68
|
+
|
|
69
|
+
# Prepare the fuzzy partition DataFrame
|
|
70
|
+
fp_df = A_df.copy()
|
|
71
|
+
fp_df.insert(0, "X_value", X)
|
|
72
|
+
fp_df["membership_value"] = fp_df.iloc[:, 1:].max(axis=1)
|
|
73
|
+
fp_df["cluster"] = fp_df.iloc[:, 1:].idxmax(axis=1)
|
|
74
|
+
|
|
75
|
+
# Initialize 'left' and 'right' columns
|
|
76
|
+
fp_df["left"] = 0
|
|
77
|
+
fp_df["right"] = 0
|
|
78
|
+
|
|
79
|
+
# Define sets for left and right logic
|
|
80
|
+
set_min = "set_0"
|
|
81
|
+
set_max = "set_" + str(len(center_points) - 1)
|
|
82
|
+
|
|
83
|
+
# Set left and right for min and max sets
|
|
84
|
+
fp_df.loc[fp_df["cluster"] == set_min, "right"] = 1
|
|
85
|
+
fp_df.loc[fp_df["cluster"] == set_max, "left"] = 1
|
|
86
|
+
|
|
87
|
+
fp_df["center_point"] = ""
|
|
88
|
+
fp_df.loc[fp_df["cluster"] == set_min, "center_point"] = 0
|
|
89
|
+
fp_df.loc[fp_df["cluster"] == set_max, "center_point"] = 1
|
|
90
|
+
|
|
91
|
+
# Logic for intermediate clusters
|
|
92
|
+
for i in range(1, len(center_points) - 1):
|
|
93
|
+
set_i = "set_" + str(i)
|
|
94
|
+
fp_df.loc[fp_df["cluster"] == set_i, "center_point"] = center_points[i]
|
|
95
|
+
fp_df.loc[
|
|
96
|
+
(fp_df["cluster"] == set_i) & (fp_df["X_value"] >= center_points[i]),
|
|
97
|
+
"right",
|
|
98
|
+
] = 1
|
|
99
|
+
fp_df.loc[
|
|
100
|
+
(fp_df["cluster"] == set_i) & (fp_df["X_value"] < center_points[i]),
|
|
101
|
+
"left",
|
|
102
|
+
] = 1
|
|
103
|
+
|
|
104
|
+
# Ensure membership values are non-negative
|
|
105
|
+
fp_df.loc[fp_df["membership_value"] < 0, "membership_value"] = 0
|
|
106
|
+
|
|
107
|
+
# Keep only relevant columns
|
|
108
|
+
fp_df = fp_df.loc[:, ["X_value", "membership_value", "cluster", "left"]]
|
|
109
|
+
|
|
110
|
+
return fp_df, center_points, center_points_unscaled.flatten()
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def visualize_partition(fp_df, center_points):
|
|
6
|
+
plt.figure(figsize=(6, 3))
|
|
7
|
+
|
|
8
|
+
# Scatter plot with size based on membership value
|
|
9
|
+
plt.scatter(
|
|
10
|
+
fp_df["X_value"],
|
|
11
|
+
fp_df["membership_value"],
|
|
12
|
+
c=fp_df["cluster"].astype("category").cat.codes,
|
|
13
|
+
cmap="viridis",
|
|
14
|
+
s=50,
|
|
15
|
+
)
|
|
16
|
+
plt.xlabel("X")
|
|
17
|
+
plt.ylabel("Membership Value")
|
|
18
|
+
plt.title("Fuzzy Partition")
|
|
19
|
+
|
|
20
|
+
# Plot center points with horizontal line at y=0.5
|
|
21
|
+
plt.plot(center_points, np.ones_like(center_points) * 0.5, "x", markersize=10)
|
|
22
|
+
|
|
23
|
+
# Add labels for center points with slight vertical offset
|
|
24
|
+
for i, txt in enumerate(center_points):
|
|
25
|
+
plt.annotate(
|
|
26
|
+
txt,
|
|
27
|
+
(center_points[i], 0.5 + 0.015),
|
|
28
|
+
horizontalalignment="center",
|
|
29
|
+
fontsize=8,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
plt.show()
|
autofuzzts/pipeline.py
ADDED
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
# pipeline.py
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from typing import Dict, Literal
|
|
4
|
+
import optuna
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from autofuzzts.config import get_config
|
|
8
|
+
from autofuzzts.data import data_loader
|
|
9
|
+
from autofuzzts.data_validation.validate import validate_and_clean_input
|
|
10
|
+
from autofuzzts.partition.partition import FuzzyPartition
|
|
11
|
+
from autofuzzts.preprocess.preprocess import preprocess_data,preprocess_data_val
|
|
12
|
+
from autofuzzts.preprocess.prep_for_model import prepare_for_model,prepare_for_model_val_set
|
|
13
|
+
from autofuzzts.models.fuzzy_classifier import FuzzyPipelineModel
|
|
14
|
+
|
|
15
|
+
from sklearn.model_selection import ParameterGrid
|
|
16
|
+
from sklearn.calibration import CalibratedClassifierCV
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
## Import RMSE and MAE
|
|
20
|
+
from sklearn.metrics import root_mean_squared_error, mean_absolute_error,mean_squared_error
|
|
21
|
+
|
|
22
|
+
# Example custom configuration
|
|
23
|
+
custom_config = {
|
|
24
|
+
"n_clusters": 5,
|
|
25
|
+
"verbosity": True,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
# Retrieve the final configuration
|
|
29
|
+
selected_config = get_config(custom_config)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def run_pipeline(datasetet_name: str, config: dict = selected_config):
|
|
34
|
+
# Load data
|
|
35
|
+
|
|
36
|
+
data = data_loader.load_sample_data(datasetet_name)
|
|
37
|
+
print(data.head(5))
|
|
38
|
+
print('Evaluated configuration is')
|
|
39
|
+
print(config)
|
|
40
|
+
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def train_val_pipeline(train_set:pd.DataFrame,val_set:pd.DataFrame,config:Dict = selected_config, metric:Literal['rmse','mse','mae'] = 'rmse',
|
|
45
|
+
diff_type:Literal['perc','abs'] = 'perc', covariates:list[str] = None) -> float:
|
|
46
|
+
train_set = validate_and_clean_input(train_set, covariates)
|
|
47
|
+
val_set = validate_and_clean_input(val_set, covariates)
|
|
48
|
+
|
|
49
|
+
print('train set length:', len(train_set))
|
|
50
|
+
|
|
51
|
+
if covariates :
|
|
52
|
+
train_covariates = train_set[covariates].copy()
|
|
53
|
+
val_covariates = val_set[covariates].copy()
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
train_set_preprocessed, scaler_train = preprocess_data(train_set, diff_type)
|
|
57
|
+
val_set_preprocessed = preprocess_data_val(df=val_set, df_train=train_set, diff_type=diff_type, scaler=scaler_train)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
fp = FuzzyPartition(fuzzy_function=config['fuzzy_part_func'], n_clusters=config['n_clusters'], sigma=config['sigma'], scaler=scaler_train, verbosity=config['verbosity'])
|
|
61
|
+
|
|
62
|
+
# Prepare train and validation fuzzy partitions
|
|
63
|
+
X_training = train_set_preprocessed['diff_scaled'].values
|
|
64
|
+
X_validation = val_set_preprocessed['diff_scaled'].values
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
train_fuzzy_partition,_,_ = fp.fuzzy_partition(X_training)
|
|
68
|
+
val_fuzzy_partition, _,center_points_unscaled_test_val = fp.fuzzy_partition(X_validation)
|
|
69
|
+
|
|
70
|
+
X_train, y_train = prepare_for_model(train_fuzzy_partition.copy(), config['number_of_lags'])
|
|
71
|
+
X_val, y_val = prepare_for_model_val_set(df_val_fp = val_fuzzy_partition.copy(),df_train_fp = train_fuzzy_partition.copy(),n_lags = config['number_of_lags'])
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
if covariates:
|
|
75
|
+
X_train = pd.concat([X_train, train_covariates], axis=1)
|
|
76
|
+
X_val = pd.concat([X_val, val_covariates], axis=1)
|
|
77
|
+
|
|
78
|
+
model = FuzzyPipelineModel(n_clusters=config['n_clusters'], number_of_lags=config['number_of_lags'], verbosity=config['verbosity'])
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
model.fit(X_train, y_train, model_type='xgb')
|
|
82
|
+
|
|
83
|
+
pred_cluster = model.predict(X_val)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
## Convert prediction to crips number using center points of clusters
|
|
87
|
+
y_val_pred_center_point = [center_points_unscaled_test_val[i] for i in pred_cluster]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
## Recalculate percentage difference to actual values
|
|
93
|
+
y_val_pred= [None] * len(val_set)
|
|
94
|
+
|
|
95
|
+
# Set the first prediction using the last known value from the train set
|
|
96
|
+
last_train_value = train_set['Y'].iloc[-1] # Assuming `df_train` holds the training data
|
|
97
|
+
y_val_pred[0] = last_train_value * (1 + y_val_pred_center_point[0])
|
|
98
|
+
|
|
99
|
+
# Loop to calculate each subsequent prediction based on the actual previous value in `df_test['Y']`
|
|
100
|
+
|
|
101
|
+
if diff_type == 'perc':
|
|
102
|
+
for i in range(1, len(val_set)):
|
|
103
|
+
prev_Y = val_set['Y'].iloc[i-1] # Use the previous actual value from `df_test`
|
|
104
|
+
perc_change = y_val_pred_center_point[i]
|
|
105
|
+
y_val_pred[i] = prev_Y * (1 + perc_change)
|
|
106
|
+
|
|
107
|
+
elif diff_type == 'abs':
|
|
108
|
+
for i in range(1, len(val_set)):
|
|
109
|
+
prev_Y = val_set['Y'].iloc[i-1]
|
|
110
|
+
y_val_pred[i] = prev_Y + y_val_pred_center_point[i]
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
if metric == 'rmse':
|
|
114
|
+
metric_value = root_mean_squared_error(val_set['Y'], y_val_pred)
|
|
115
|
+
elif metric == 'mse':
|
|
116
|
+
metric_value = root_mean_squared_error(val_set['Y'], y_val_pred)**2
|
|
117
|
+
elif metric == 'mae':
|
|
118
|
+
metric_value = mean_absolute_error(val_set['Y'], y_val_pred)
|
|
119
|
+
else:
|
|
120
|
+
raise ValueError(f"Invalid metric {metric}. Please choose one of 'rmse', 'mse', 'mae'")
|
|
121
|
+
|
|
122
|
+
return metric_value
|
|
123
|
+
|
|
124
|
+
def train_model(dataset: pd.DataFrame, config: Dict, model_type: Literal['xgb','mlp','tpot'] = 'xgb'):
|
|
125
|
+
'''
|
|
126
|
+
Function to train a model on the dataset provided.
|
|
127
|
+
|
|
128
|
+
Parameters:
|
|
129
|
+
dataset: pd.DataFrame
|
|
130
|
+
The dataset to train the model on.
|
|
131
|
+
config: dict
|
|
132
|
+
The configuration dictionary for the model.
|
|
133
|
+
model_type: str
|
|
134
|
+
The type of model to train. Default is 'xgb'.
|
|
135
|
+
|
|
136
|
+
'''
|
|
137
|
+
config = get_config(config)
|
|
138
|
+
|
|
139
|
+
df = validate_and_clean_input(dataset)
|
|
140
|
+
|
|
141
|
+
df_preprocessed, scaler_train = preprocess_data(df, diff_type='perc')
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
fp = FuzzyPartition(fuzzy_function=config['fuzzy_part_func'], n_clusters=config['n_clusters'], sigma=config['sigma'], scaler=scaler_train, verbosity=config['verbosity'])
|
|
145
|
+
|
|
146
|
+
X_training = df_preprocessed['diff_scaled'].values
|
|
147
|
+
|
|
148
|
+
train_fuzzy_partition,_,_ = fp.fuzzy_partition(X_training)
|
|
149
|
+
|
|
150
|
+
X_train, y_train = prepare_for_model(train_fuzzy_partition.copy(), config['number_of_lags'])
|
|
151
|
+
|
|
152
|
+
model_train = FuzzyPipelineModel(n_clusters=config['n_clusters'], number_of_lags=config['number_of_lags'], verbosity=config['verbosity'])
|
|
153
|
+
|
|
154
|
+
model_train.fit(X_train, y_train, model_type=model_type)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
return model_train, scaler_train
|
|
158
|
+
|
|
159
|
+
def tune_hyperparameters_bayes(train_set: pd.DataFrame, val_set: pd.DataFrame, n_trials: int = 315, metric: Literal['rmse', 'mse', 'mae'] = 'rmse',
|
|
160
|
+
diff_type: Literal['perc', 'abs'] = 'perc', covariates: list[str] = None):
|
|
161
|
+
def objective(trial):
|
|
162
|
+
# Define search space based on your specifications
|
|
163
|
+
config = {
|
|
164
|
+
'n_clusters': trial.suggest_int('n_clusters', 4, 40), # Number of fuzzy sets
|
|
165
|
+
'number_of_lags': trial.suggest_int('number_of_lags', 1, 10), # Number of lags
|
|
166
|
+
'fuzzy_part_func': trial.suggest_categorical('fuzzy_part_func', ['Triangle', 'Cosine', 'Gaussian']), # Partition function type
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if config['fuzzy_part_func'] == 'Gaussian':
|
|
170
|
+
config['sigma'] = trial.suggest_float('sigma', 0.1, 4, log=True)
|
|
171
|
+
else:
|
|
172
|
+
config['sigma'] = None
|
|
173
|
+
|
|
174
|
+
selected_config = get_config(config)
|
|
175
|
+
|
|
176
|
+
# Use train_val_pipeline to evaluate this configuration
|
|
177
|
+
metric_value = train_val_pipeline(train_set, val_set, selected_config, metric, diff_type, covariates=covariates)
|
|
178
|
+
return metric_value
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# Create and optimize the Optuna study
|
|
182
|
+
study = optuna.create_study(direction="minimize")
|
|
183
|
+
study.optimize(objective, n_trials=n_trials)
|
|
184
|
+
|
|
185
|
+
# Extract the best configuration and score
|
|
186
|
+
best_config = study.best_params
|
|
187
|
+
best_metric_value = study.best_value
|
|
188
|
+
|
|
189
|
+
print(f"Best Config: {best_config}")
|
|
190
|
+
print(f"Best {metric.upper()}: {best_metric_value}")
|
|
191
|
+
return best_config, best_metric_value
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def tune_hyperparameters_bayes_Henon(train_set: pd.DataFrame, val_set: pd.DataFrame, n_trials: int = 315, metric: Literal['rmse', 'mse', 'mae'] = 'rmse', diff_type: Literal['perc', 'abs'] = 'perc'):
|
|
195
|
+
def objective(trial):
|
|
196
|
+
config = {
|
|
197
|
+
'n_clusters': trial.suggest_int('n_clusters', 2, 29), # Number of fuzzy sets
|
|
198
|
+
'number_of_lags': trial.suggest_int('n_lags', 2, 5), # Number of lags
|
|
199
|
+
'fuzzy_part_func': trial.suggest_categorical('fuzzy_part_func', ['Triangle', 'Cosine', 'Gaussian']),
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if config['fuzzy_part_func'] == 'Gaussian':
|
|
203
|
+
config['sigma'] = trial.suggest_float('sigma', 0.1, 4, log=True)
|
|
204
|
+
else:
|
|
205
|
+
config['sigma'] = None
|
|
206
|
+
|
|
207
|
+
selected_config = get_config(config)
|
|
208
|
+
|
|
209
|
+
# Use train_val_pipeline to evaluate this configuration
|
|
210
|
+
metric_value = train_val_pipeline(train_set, val_set, selected_config, metric,diff_type)
|
|
211
|
+
return metric_value
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
# Create and optimize the Optuna study
|
|
215
|
+
study = optuna.create_study(direction="minimize")
|
|
216
|
+
study.optimize(objective, n_trials=n_trials)
|
|
217
|
+
|
|
218
|
+
# Extract the best configuration and score
|
|
219
|
+
best_config = study.best_params
|
|
220
|
+
best_metric_value = study.best_value
|
|
221
|
+
|
|
222
|
+
print(f"Best Config: {best_config}")
|
|
223
|
+
print(f"Best {metric.upper()}: {best_metric_value}")
|
|
224
|
+
return best_config, best_metric_value
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def tune_hyperparameters_grid(train_set: pd.DataFrame, val_set: pd.DataFrame,n_trials: int = 315, metric: Literal['rmse', 'mse', 'mae'] = 'rmse', diff_type: Literal['perc', 'abs'] = 'perc'):
|
|
231
|
+
|
|
232
|
+
# Define grid for Gaussian fuzzy function (includes 'sigma')
|
|
233
|
+
grid_gauss = {
|
|
234
|
+
'n_lags': [1, 3, 5, 7, 9],
|
|
235
|
+
'n_clusters': [4, 6, 8, 10, 12, 14, 16, 18, 20],
|
|
236
|
+
'sigma': [0.1, 0.5, 1, 5, 9],
|
|
237
|
+
'fuzzy_part_func': ['matrix_F_transform_gauss']
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
# Define grid for non-Gaussian fuzzy functions (excludes 'sigma')
|
|
241
|
+
grid_non_gauss = {
|
|
242
|
+
'n_lags': [1, 3, 5, 7, 9],
|
|
243
|
+
'n_clusters': [4, 6, 8, 10, 12, 14, 16, 18, 20],
|
|
244
|
+
'sigma': [None], # Set sigma to None for non-Gaussian functions
|
|
245
|
+
'fuzzy_part_func': ['matrix_F_transform_cosine', 'matrix_F_transform_triangle']
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
# Combine the grids
|
|
249
|
+
grid_gauss = list(ParameterGrid(grid_gauss))
|
|
250
|
+
grid_non_gauss = list(ParameterGrid(grid_non_gauss))
|
|
251
|
+
combined_grid = grid_gauss + grid_non_gauss
|
|
252
|
+
|
|
253
|
+
## Run the grid search------------------------------------------------------------------------------------------------------
|
|
254
|
+
best_metric_value = float("inf")
|
|
255
|
+
best_config = None
|
|
256
|
+
num_evaluations = 0
|
|
257
|
+
|
|
258
|
+
for config in combined_grid:
|
|
259
|
+
selected_config = get_config(config)
|
|
260
|
+
# Count the configuration being evaluated
|
|
261
|
+
num_evaluations += 1
|
|
262
|
+
|
|
263
|
+
if num_evaluations >= n_trials:
|
|
264
|
+
break
|
|
265
|
+
|
|
266
|
+
## If number of evaluation is divisible by 20 print the number of evaluations
|
|
267
|
+
if num_evaluations % 20 == 0:
|
|
268
|
+
print(f"Number of evaluations done: {num_evaluations}")
|
|
269
|
+
|
|
270
|
+
# Evaluate the config on the validation set using train_val_pipeline
|
|
271
|
+
metric_value = train_val_pipeline(train_set, val_set, selected_config, metric, diff_type)
|
|
272
|
+
|
|
273
|
+
# Update best config if this one is better according to the selected metric
|
|
274
|
+
if metric_value < best_metric_value:
|
|
275
|
+
best_metric_value = metric_value
|
|
276
|
+
best_config = config
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
return best_config, best_metric_value, num_evaluations
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def train_calib_pred_test(train_set: pd.DataFrame, test_set: pd.DataFrame,
|
|
285
|
+
config: Dict,
|
|
286
|
+
model_type: Literal['xgb','mlp','tpot'] = 'xgb', number_cv_calib = 5, diff_type: Literal['perc','abs'] = 'perc',
|
|
287
|
+
covariates: list[str] = None, exclude_bool:bool = False) -> float:
|
|
288
|
+
'''
|
|
289
|
+
Aim of this question is to train a model on the train set, calibrate it using the calibration method provided, and predict it on the test set using the metric provided.
|
|
290
|
+
'''
|
|
291
|
+
|
|
292
|
+
config = get_config(config)
|
|
293
|
+
|
|
294
|
+
# Step 1: Validate and preprocess the input data
|
|
295
|
+
train_set = validate_and_clean_input(train_set, covariates=covariates)
|
|
296
|
+
test_set = validate_and_clean_input(test_set, covariates=covariates)
|
|
297
|
+
|
|
298
|
+
train_set_preprocessed, scaler_train = preprocess_data(train_set, diff_type=diff_type)
|
|
299
|
+
test_set_preprocessed = preprocess_data_val(df=test_set, df_train=train_set, diff_type=diff_type, scaler=scaler_train)
|
|
300
|
+
|
|
301
|
+
# Step 2: Fuzzy Partition for train, validation, and test sets
|
|
302
|
+
fp = FuzzyPartition(fuzzy_function=config['fuzzy_part_func'],
|
|
303
|
+
n_clusters=config['n_clusters'],
|
|
304
|
+
sigma=config['sigma'],
|
|
305
|
+
scaler=scaler_train,
|
|
306
|
+
verbosity=config['verbosity'])
|
|
307
|
+
|
|
308
|
+
# Prepare train, validation, and test fuzzy partitions
|
|
309
|
+
X_training = train_set_preprocessed['diff_scaled'].values
|
|
310
|
+
X_test = test_set_preprocessed['diff_scaled'].values
|
|
311
|
+
|
|
312
|
+
train_fuzzy_partition, _, _ = fp.fuzzy_partition(X_training)
|
|
313
|
+
test_fuzzy_partition, _, center_points_unscaled_test = fp.fuzzy_partition(X_test)
|
|
314
|
+
|
|
315
|
+
if exclude_bool:
|
|
316
|
+
## Remove column left from train_fuzzy_partition
|
|
317
|
+
train_fuzzy_partition = train_fuzzy_partition.drop(columns=['left'])
|
|
318
|
+
test_fuzzy_partition = test_fuzzy_partition.drop(columns=['left'])
|
|
319
|
+
|
|
320
|
+
train_fuzzy_partition.to_csv('train_fuzzy_partition.csv')
|
|
321
|
+
test_fuzzy_partition.to_csv('test_fuzzy_partition.csv')
|
|
322
|
+
|
|
323
|
+
print('center_points_unscaled_test:', center_points_unscaled_test)
|
|
324
|
+
|
|
325
|
+
# Prepare data for model training, validation, and testing
|
|
326
|
+
X_train, y_train = prepare_for_model(train_fuzzy_partition.copy(), config['number_of_lags'])
|
|
327
|
+
X_test_final, _ = prepare_for_model_val_set(df_val_fp=test_fuzzy_partition.copy(),
|
|
328
|
+
df_train_fp=train_fuzzy_partition.copy(),
|
|
329
|
+
n_lags=config['number_of_lags'])
|
|
330
|
+
|
|
331
|
+
if covariates:
|
|
332
|
+
train_covariates = train_set[covariates].copy()
|
|
333
|
+
test_covariates = test_set[covariates].copy()
|
|
334
|
+
|
|
335
|
+
X_train = pd.concat([X_train, train_covariates], axis=1)
|
|
336
|
+
X_test_final = pd.concat([X_test_final, test_covariates], axis=1)
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
# Step 3: Train the model on the combined train and validation set
|
|
340
|
+
model = FuzzyPipelineModel(n_clusters=config['n_clusters'],
|
|
341
|
+
number_of_lags=config['number_of_lags'],
|
|
342
|
+
verbosity=config['verbosity'])
|
|
343
|
+
|
|
344
|
+
model.fit(X_train, y_train, model_type=model_type)
|
|
345
|
+
|
|
346
|
+
try:
|
|
347
|
+
# Step 4: Calibrate the model using CalibratedClassifierCV
|
|
348
|
+
model.calibrate(X_train, y_train, method='sigmoid', cv=number_cv_calib)
|
|
349
|
+
except:
|
|
350
|
+
pass
|
|
351
|
+
|
|
352
|
+
# Step 5: Make predictions and evaluate on the test set
|
|
353
|
+
y_test_pred_cluster = model.predict(X_test_final)
|
|
354
|
+
|
|
355
|
+
## Convert prediction to crips number using center points of clusters
|
|
356
|
+
y_test_pred_center_point = [center_points_unscaled_test[i] for i in y_test_pred_cluster]
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
## Recalculate percentage difference to actual values
|
|
362
|
+
y_test_pred= [None] * len(test_set)
|
|
363
|
+
|
|
364
|
+
# Set the first prediction using the last known value from the train set
|
|
365
|
+
last_train_value = train_set['Y'].iloc[-1] # Assuming `df_train` holds the training data
|
|
366
|
+
y_test_pred[0] = last_train_value * (1 + y_test_pred_center_point[0])
|
|
367
|
+
|
|
368
|
+
if diff_type == 'perc':
|
|
369
|
+
# Loop to calculate each subsequent prediction based on the actual previous value in `df_test['Y']`
|
|
370
|
+
for i in range(1, len(test_set)):
|
|
371
|
+
prev_Y = test_set['Y'].iloc[i-1] # Use the previous actual value from `df_test`
|
|
372
|
+
perc_change = y_test_pred_center_point[i]
|
|
373
|
+
y_test_pred[i] = prev_Y * (1 + perc_change)
|
|
374
|
+
|
|
375
|
+
elif diff_type == 'abs':
|
|
376
|
+
for i in range(1, len(test_set)):
|
|
377
|
+
prev_Y = test_set['Y'].iloc[i-1]
|
|
378
|
+
y_test_pred[i] = prev_Y + y_test_pred_center_point[i]
|
|
379
|
+
|
|
380
|
+
return y_test_pred_cluster, y_test_pred_center_point,y_test_pred
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# __init__.py
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import warnings
|
|
3
|
+
from sklearn.preprocessing import LabelEncoder
|
|
4
|
+
|
|
5
|
+
def prepare_for_model(df: pd.DataFrame, number_of_lags: int):
|
|
6
|
+
"""
|
|
7
|
+
Prepare clustering data for modeling.
|
|
8
|
+
|
|
9
|
+
Parameters:
|
|
10
|
+
- df (pd.DataFrame): The input DataFrame containing clustering data.
|
|
11
|
+
- number_of_lags (int): The number of lag features to create.
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
- X_train (pd.DataFrame): Features for training the model.
|
|
15
|
+
- y_train (np.ndarray): Target variable for training the model.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
# Prepare the 'cluster' column
|
|
19
|
+
df.loc[:, "cluster"] = df["cluster"].str.replace("set_", "").astype(int).copy()
|
|
20
|
+
|
|
21
|
+
# Create lagged features
|
|
22
|
+
for i in range(1, number_of_lags + 1):
|
|
23
|
+
df.loc[:, "cluster_lag" + str(i)] = df["cluster"].shift(i).copy()
|
|
24
|
+
df.loc[:, "membership_value_lag" + str(i)] = df["membership_value"].shift(i).copy()
|
|
25
|
+
df.loc[:, "left_lag" + str(i)] = df["left"].shift(i).copy()
|
|
26
|
+
|
|
27
|
+
# Reset warning filter
|
|
28
|
+
warnings.filterwarnings("default", category=pd.errors.SettingWithCopyWarning)
|
|
29
|
+
|
|
30
|
+
# Prepare the model DataFrame
|
|
31
|
+
df_model = df.copy()
|
|
32
|
+
df_model.drop(columns=["membership_value", "left"], inplace=True)
|
|
33
|
+
df_model.rename(columns={"X_value": "Y"}, inplace=True)
|
|
34
|
+
|
|
35
|
+
numeric_cols = df_model.select_dtypes(include=['float64', 'int64']).columns
|
|
36
|
+
df_model[numeric_cols] = df_model[numeric_cols].fillna(0)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Separate target and features
|
|
40
|
+
y_train = df_model["cluster"]
|
|
41
|
+
X_train = df_model.drop(columns=["Y", "cluster"])
|
|
42
|
+
|
|
43
|
+
# Encode categorical columns
|
|
44
|
+
label_encoder = LabelEncoder()
|
|
45
|
+
encoded_cols = []
|
|
46
|
+
|
|
47
|
+
# Loop through columns and encode if they start with 'cluster_'
|
|
48
|
+
for col in X_train.columns:
|
|
49
|
+
if col.startswith("cluster_"):
|
|
50
|
+
X_train[col] = label_encoder.fit_transform(X_train[col])
|
|
51
|
+
encoded_cols.append(col)
|
|
52
|
+
|
|
53
|
+
# Label encode y_train
|
|
54
|
+
y_train = label_encoder.fit_transform(y_train)
|
|
55
|
+
|
|
56
|
+
return X_train, y_train
|
|
57
|
+
|
|
58
|
+
def prepare_for_model_val_set(df_val_fp: pd.DataFrame, df_train_fp: pd.DataFrame, n_lags: pd.DataFrame):
|
|
59
|
+
'''
|
|
60
|
+
Prepare validation set. Attach to the begginning of val set rows from the end of the train set (based on numbef of lags). In the end remove the attached rows.
|
|
61
|
+
'''
|
|
62
|
+
df_concat = pd.concat([df_train_fp.tail(n_lags), df_val_fp], axis=0).reset_index(drop=True)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
X_val, y_val = prepare_for_model(df=df_concat, number_of_lags=n_lags)
|
|
66
|
+
|
|
67
|
+
X_val = X_val.iloc[n_lags:]
|
|
68
|
+
y_val = y_val[n_lags:]
|
|
69
|
+
|
|
70
|
+
return X_val, y_val
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
from sklearn.preprocessing import MinMaxScaler
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def preprocess_data(df: pd.DataFrame, diff_type: Literal['perc', 'abs'] = 'perc', scaler: MinMaxScaler = None) -> pd.DataFrame:
|
|
8
|
+
"""
|
|
9
|
+
Prepares time series data by calculating differences, scaling, and selecting rows.
|
|
10
|
+
|
|
11
|
+
Parameters:
|
|
12
|
+
df (pd.DataFrame): Input DataFrame with a single column named 'Y' containing the time series data.
|
|
13
|
+
diff_type (str): Type of difference to calculate ('perc' for percentage, 'abs' for absolute). Default is 'perc'.
|
|
14
|
+
n_rows (int): Number of rows to retain from the end. If -1, use all rows.
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
np.ndarray: The preprocessed data, scaled and ready for further processing.
|
|
18
|
+
MinMaxScaler: The scaler used for scaling, useful for inverse transformation.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
# Step 1: Calculate the difference based on user choice
|
|
22
|
+
if diff_type == 'perc':
|
|
23
|
+
df['diff'] = df['Y'].pct_change() # Percentage difference
|
|
24
|
+
elif diff_type == 'abs':
|
|
25
|
+
df['diff'] = df['Y'].diff() # Absolute difference
|
|
26
|
+
else:
|
|
27
|
+
raise ValueError("Invalid diff_type. Choose 'perc' for percentage or 'abs' for absolute.")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
## Replace infinite values with 1 or -1
|
|
31
|
+
df['diff'] = np.where(df['diff'] == np.inf, 1, df['diff'])
|
|
32
|
+
df['diff'] = np.where(df['diff'] == -np.inf, -1, df['diff'])
|
|
33
|
+
|
|
34
|
+
## If diff is bellow 0.01 quantile or 0.99 quantile, replace with 0.01 or 0.99 quantile
|
|
35
|
+
df['diff'] = np.where(df['diff'] < df['diff'].quantile(0.01), df['diff'].quantile(0.01), df['diff'])
|
|
36
|
+
df['diff'] = np.where(df['diff'] > df['diff'].quantile(0.99), df['diff'].quantile(0.99), df['diff'])
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
## Relace NaNs with 0
|
|
40
|
+
df['diff'] = df['diff'].fillna(0) # Replace NaNs with 0, or adjust as needed
|
|
41
|
+
|
|
42
|
+
# Step 2: Scale only the 'diff' column
|
|
43
|
+
if scaler is None: # If no scaler is provided, create a new one (otherwise use the existing one)
|
|
44
|
+
scaler = MinMaxScaler()
|
|
45
|
+
|
|
46
|
+
df_scaled = df.copy()
|
|
47
|
+
df_scaled['diff_scaled'] = scaler.fit_transform(df[['diff']]) # Scale 'diff' column only
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
return df_scaled, scaler # Return scaled data and scaler for possible inverse transform
|
|
53
|
+
|
|
54
|
+
def preprocess_data_val(df: pd.DataFrame,df_train: pd.DataFrame, diff_type: Literal['perc', 'abs'] = 'perc', scaler: MinMaxScaler = None):
|
|
55
|
+
'''
|
|
56
|
+
Attach last row of train set to the beginnig of the val set and preprocess the data. In the end remove the attached row.
|
|
57
|
+
'''
|
|
58
|
+
df_concat = pd.concat([df_train.tail(1), df], axis=0)
|
|
59
|
+
df_preprocessed, scaler = preprocess_data(df=df_concat, diff_type=diff_type, scaler=scaler)
|
|
60
|
+
df_preprocessed = df_preprocessed.iloc[1:]
|
|
61
|
+
return df_preprocessed
|
|
62
|
+
|
|
63
|
+
|
autofuzzts/utils.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# utils.py
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: autofuzzts
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Automated fuzzy time series forecasting library
|
|
5
|
+
Home-page: https://github.com/jtimko16/AutoFuzzTS
|
|
6
|
+
Author: Jan Timko
|
|
7
|
+
Author-email: Jan Timko <jantimko16@gmail.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
Project-URL: Homepage, https://github.com/jtimko16/AutoFuzzTS
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/jtimko16/AutoFuzzTS/issues
|
|
11
|
+
Requires-Python: >=3.11
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: numpy>=1.26.0
|
|
15
|
+
Requires-Dist: pandas>=2.2.0
|
|
16
|
+
Requires-Dist: scikit-learn>=1.5.0
|
|
17
|
+
Requires-Dist: scipy>=1.15.0
|
|
18
|
+
Requires-Dist: xgboost>=3.0.0
|
|
19
|
+
Requires-Dist: lightgbm>=4.6.0
|
|
20
|
+
Requires-Dist: tpot>=1.0.0
|
|
21
|
+
Requires-Dist: optuna>=4.3.0
|
|
22
|
+
Requires-Dist: matplotlib>=3.10.0
|
|
23
|
+
Requires-Dist: seaborn>=0.13.0
|
|
24
|
+
Requires-Dist: requests>=2.32.0
|
|
25
|
+
Requires-Dist: PyYAML>=6.0.0
|
|
26
|
+
Requires-Dist: joblib>=1.4.0
|
|
27
|
+
Requires-Dist: tqdm>=4.67.0
|
|
28
|
+
Dynamic: author
|
|
29
|
+
Dynamic: home-page
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
Dynamic: requires-python
|
|
32
|
+
|
|
33
|
+
# AutoFuzzTS
|
|
34
|
+
|
|
35
|
+
Automated fuzzy time series forecasting library in Python.
|
|
36
|
+
Build and evaluate time series models automatically using fuzzy logic and AutoML techniques.
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install autofuzzts
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
autofuzzts/__init__.py,sha256=2k_ZeqU7FvqZMFqGm-EYRiV98uxUxmiy5wXygvIobPU,13
|
|
2
|
+
autofuzzts/config.py,sha256=rzwULHfKKtf5Rdpm8pk-zwuXrkKc0dckF-xIfz1UVcY,392
|
|
3
|
+
autofuzzts/pipeline.py,sha256=3rre8nzxEtSQI2_G4STt-AqIACEDLczP0t4YMr0IMW8,15782
|
|
4
|
+
autofuzzts/utils.py,sha256=lywC_KhHuYgjUmXjj-ay9vZYTKUSxFgWXY2q6EdWf9s,10
|
|
5
|
+
autofuzzts/data/__init__.py,sha256=2k_ZeqU7FvqZMFqGm-EYRiV98uxUxmiy5wXygvIobPU,13
|
|
6
|
+
autofuzzts/data/data_loader.py,sha256=VO8V9O3WgXffyktUMSmbGTiXWBJ2kgN5wLqgFgvkE6w,266
|
|
7
|
+
autofuzzts/data_validation/__init__.py,sha256=2k_ZeqU7FvqZMFqGm-EYRiV98uxUxmiy5wXygvIobPU,13
|
|
8
|
+
autofuzzts/data_validation/validate.py,sha256=ttK3nnvfTfxFF_GKyfxuU168oqmA6MEemB1dP06mL7g,1453
|
|
9
|
+
autofuzzts/evaluation/__init__.py,sha256=2k_ZeqU7FvqZMFqGm-EYRiV98uxUxmiy5wXygvIobPU,13
|
|
10
|
+
autofuzzts/models/__init__.py,sha256=2k_ZeqU7FvqZMFqGm-EYRiV98uxUxmiy5wXygvIobPU,13
|
|
11
|
+
autofuzzts/models/fuzzy_classifier.py,sha256=mU0t91n-8mTJQs-_XDYbrix9oa6EQP_3UvGDCw-GmJY,3363
|
|
12
|
+
autofuzzts/models/mlp_nas.py,sha256=OCFtrd47IhesAqtaHpBTOwKPdFly9yjl7O-25msXXGE,3048
|
|
13
|
+
autofuzzts/partition/__init__.py,sha256=2k_ZeqU7FvqZMFqGm-EYRiV98uxUxmiy5wXygvIobPU,13
|
|
14
|
+
autofuzzts/partition/fuzzy_clust_fun.py,sha256=NlpkI8s7N4ebdHWbGECsQZqk1Xf8v0c15cA27JEMJ-A,3097
|
|
15
|
+
autofuzzts/partition/fuzzy_clust_fun_orig.py,sha256=JlXYw-MxiNAcIasYTyWiQHaHhJuY8h_BRy3jLo2efOA,3653
|
|
16
|
+
autofuzzts/partition/partition.py,sha256=f5nTHjrJJYKjtzMFxsdfPL_CGBb12HOR0hkGi4L_WLY,4410
|
|
17
|
+
autofuzzts/partition/visualize_partition.py,sha256=F31yovGfosqa-EmtuQdIIuF61XejHEGGdALfHHAtDu0,909
|
|
18
|
+
autofuzzts/preprocess/__init__.py,sha256=2k_ZeqU7FvqZMFqGm-EYRiV98uxUxmiy5wXygvIobPU,13
|
|
19
|
+
autofuzzts/preprocess/prep_for_model.py,sha256=mp19PGo_p8YWezSny__qKnuTREhAldSlxCzIutrisGk,2565
|
|
20
|
+
autofuzzts/preprocess/preprocess.py,sha256=QZ0h4bZslwOrjTUyvPQaXDT_lBlnL8nKdp545Qy3xdk,2786
|
|
21
|
+
autofuzzts-0.1.0.dist-info/licenses/LICENSE,sha256=bjnZy7iTBVYeRcAPI9NVlXeQGx62R13_t8xwoLq44Ms,1087
|
|
22
|
+
autofuzzts-0.1.0.dist-info/METADATA,sha256=8Kg6RDBTKZBNKA8y3Lwp74Rw8lL9H1Hl1VU43Dm_9n0,1222
|
|
23
|
+
autofuzzts-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
24
|
+
autofuzzts-0.1.0.dist-info/top_level.txt,sha256=YHgbVRUPg-x2WX7FKyJMUAeI9o46c8XFiR_eYKtXIxc,11
|
|
25
|
+
autofuzzts-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Jan Timko
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
autofuzzts
|