autofuzzts 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autofuzzts/config.py +17 -17
- autofuzzts/data/data_loader.py +7 -7
- autofuzzts/data_validation/validate.py +41 -41
- autofuzzts/models/fuzzy_classifier.py +82 -82
- autofuzzts/models/mlp_nas.py +90 -90
- autofuzzts/partition/{fuzzy_clust_fun.py → fuzzy_part_fun.py} +107 -107
- autofuzzts/partition/partition.py +109 -109
- autofuzzts/partition/visualize_partition.py +32 -32
- autofuzzts/pipeline.py +469 -469
- autofuzzts/preprocess/prep_for_model.py +70 -70
- autofuzzts/preprocess/preprocess.py +62 -62
- {autofuzzts-0.1.2.dist-info → autofuzzts-0.1.3.dist-info}/METADATA +161 -146
- autofuzzts-0.1.3.dist-info/RECORD +23 -0
- {autofuzzts-0.1.2.dist-info → autofuzzts-0.1.3.dist-info}/WHEEL +1 -1
- {autofuzzts-0.1.2.dist-info → autofuzzts-0.1.3.dist-info}/licenses/LICENSE +21 -21
- autofuzzts/partition/fuzzy_clust_fun_orig.py +0 -129
- autofuzzts/utils.py +0 -1
- autofuzzts-0.1.2.dist-info/RECORD +0 -25
- {autofuzzts-0.1.2.dist-info → autofuzzts-0.1.3.dist-info}/top_level.txt +0 -0
autofuzzts/config.py
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
|
-
# config.py
|
|
2
|
-
|
|
3
|
-
DEFAULT_CONFIG = {
|
|
4
|
-
"
|
|
5
|
-
"number_of_lags": 5,
|
|
6
|
-
"plot_partition": False,
|
|
7
|
-
"pred_column": "Y",
|
|
8
|
-
"fuzzy_part_func": "triangle",
|
|
9
|
-
"n_rows": 0,
|
|
10
|
-
"sigma": 1.0,
|
|
11
|
-
"verbosity": False,
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
def get_config(custom_config=None):
|
|
15
|
-
config = DEFAULT_CONFIG.copy()
|
|
16
|
-
if custom_config:
|
|
17
|
-
config.update(custom_config)
|
|
1
|
+
# config.py
|
|
2
|
+
|
|
3
|
+
DEFAULT_CONFIG = {
|
|
4
|
+
"n_fuzzy_sets": 3,
|
|
5
|
+
"number_of_lags": 5,
|
|
6
|
+
"plot_partition": False,
|
|
7
|
+
"pred_column": "Y",
|
|
8
|
+
"fuzzy_part_func": "triangle",
|
|
9
|
+
"n_rows": 0,
|
|
10
|
+
"sigma": 1.0,
|
|
11
|
+
"verbosity": False,
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
def get_config(custom_config=None):
|
|
15
|
+
config = DEFAULT_CONFIG.copy()
|
|
16
|
+
if custom_config:
|
|
17
|
+
config.update(custom_config)
|
|
18
18
|
return config
|
autofuzzts/data/data_loader.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import pandas as pd
|
|
3
|
-
|
|
4
|
-
def load_sample_data(file_name):
|
|
5
|
-
data_path = os.path.join(os.path.dirname(__file__), 'sample_datasets', file_name)
|
|
6
|
-
print(f"Loading data from: {data_path}") # Print the constructed path
|
|
7
|
-
return pd.read_csv(data_path)
|
|
1
|
+
import os
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
def load_sample_data(file_name):
|
|
5
|
+
data_path = os.path.join(os.path.dirname(__file__), 'sample_datasets', file_name)
|
|
6
|
+
print(f"Loading data from: {data_path}") # Print the constructed path
|
|
7
|
+
return pd.read_csv(data_path)
|
|
@@ -1,41 +1,41 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
import warnings
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def validate_and_clean_input(df: pd.DataFrame, covariates:list[str] = None) -> pd.DataFrame:
|
|
6
|
-
"""
|
|
7
|
-
Validates the input DataFrame, ensuring it contains at least one column.
|
|
8
|
-
If the DataFrame contains multiple columns, all columns except the first
|
|
9
|
-
are removed, and a warning is issued.
|
|
10
|
-
|
|
11
|
-
Parameters:
|
|
12
|
-
df (pd.DataFrame): The input DataFrame.
|
|
13
|
-
|
|
14
|
-
Returns:
|
|
15
|
-
pd.DataFrame: A DataFrame with only the first column retained.
|
|
16
|
-
"""
|
|
17
|
-
if not isinstance(df, pd.DataFrame):
|
|
18
|
-
raise ValueError("Input data must be a pandas DataFrame.")
|
|
19
|
-
|
|
20
|
-
if df.shape[1] == 0:
|
|
21
|
-
raise ValueError("Input DataFrame must contain at least one column.")
|
|
22
|
-
|
|
23
|
-
# If there are multiple columns, keep only the first one and warn the user
|
|
24
|
-
if df.shape[1] > 1:
|
|
25
|
-
if covariates is None:
|
|
26
|
-
warnings.warn("Input DataFrame has multiple columns. Only the first column will be used.")
|
|
27
|
-
df = df[[df.columns[0]]]
|
|
28
|
-
else:
|
|
29
|
-
warnings.warn("Input DataFrame has multiple columns. Covariates will be used for modelling.")
|
|
30
|
-
df = df[[df.columns[0]] + covariates]
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
# Standardize column name to 'Y'
|
|
35
|
-
df = df.rename(columns={df.columns[0]: 'Y'})
|
|
36
|
-
|
|
37
|
-
# Convert all selected columns to numeric and fill NaNs with 0
|
|
38
|
-
df = df.apply(pd.to_numeric, errors='coerce').fillna(0)
|
|
39
|
-
|
|
40
|
-
return df
|
|
41
|
-
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import warnings
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def validate_and_clean_input(df: pd.DataFrame, covariates:list[str] = None) -> pd.DataFrame:
|
|
6
|
+
"""
|
|
7
|
+
Validates the input DataFrame, ensuring it contains at least one column.
|
|
8
|
+
If the DataFrame contains multiple columns, all columns except the first
|
|
9
|
+
are removed, and a warning is issued.
|
|
10
|
+
|
|
11
|
+
Parameters:
|
|
12
|
+
df (pd.DataFrame): The input DataFrame.
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
pd.DataFrame: A DataFrame with only the first column retained.
|
|
16
|
+
"""
|
|
17
|
+
if not isinstance(df, pd.DataFrame):
|
|
18
|
+
raise ValueError("Input data must be a pandas DataFrame.")
|
|
19
|
+
|
|
20
|
+
if df.shape[1] == 0:
|
|
21
|
+
raise ValueError("Input DataFrame must contain at least one column.")
|
|
22
|
+
|
|
23
|
+
# If there are multiple columns, keep only the first one and warn the user
|
|
24
|
+
if df.shape[1] > 1:
|
|
25
|
+
if covariates is None:
|
|
26
|
+
warnings.warn("Input DataFrame has multiple columns. Only the first column will be used.")
|
|
27
|
+
df = df[[df.columns[0]]]
|
|
28
|
+
else:
|
|
29
|
+
warnings.warn("Input DataFrame has multiple columns. Covariates will be used for modelling.")
|
|
30
|
+
df = df[[df.columns[0]] + covariates]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# Standardize column name to 'Y'
|
|
35
|
+
df = df.rename(columns={df.columns[0]: 'Y'})
|
|
36
|
+
|
|
37
|
+
# Convert all selected columns to numeric and fill NaNs with 0
|
|
38
|
+
df = df.apply(pd.to_numeric, errors='coerce').fillna(0)
|
|
39
|
+
|
|
40
|
+
return df
|
|
41
|
+
|
|
@@ -1,83 +1,83 @@
|
|
|
1
|
-
import xgboost as xgb
|
|
2
|
-
from typing import Literal
|
|
3
|
-
from tpot import TPOTClassifier
|
|
4
|
-
from autofuzzts.models.mlp_nas import build_model
|
|
5
|
-
from sklearn.calibration import CalibratedClassifierCV
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class FuzzyPipelineModel:
|
|
9
|
-
def __init__(self,
|
|
10
|
-
self.
|
|
11
|
-
self.number_of_lags = number_of_lags
|
|
12
|
-
self.verbosity = verbosity
|
|
13
|
-
self.model = None # Placeholder for the fitted model
|
|
14
|
-
|
|
15
|
-
def fit(self, X_train, y_train, model_type:Literal['xgb', 'tpot','mlp']='xgb', **kwargs):
|
|
16
|
-
"""
|
|
17
|
-
Fit the model based on the specified model type and input parameters.
|
|
18
|
-
|
|
19
|
-
Parameters:
|
|
20
|
-
- X_train: Features for training.
|
|
21
|
-
- y_train: Labels for training.
|
|
22
|
-
- model_type: 'xgboost', 'mlp', or 'tpot'.
|
|
23
|
-
- kwargs: Additional parameters for model fitting.
|
|
24
|
-
"""
|
|
25
|
-
if model_type == 'xgb':
|
|
26
|
-
model = xgb.XGBClassifier(objective="multi:softmax", num_class=self.
|
|
27
|
-
self.model = model.fit(X_train, y_train)
|
|
28
|
-
|
|
29
|
-
elif model_type == 'tpot':
|
|
30
|
-
tpot = TPOTClassifier(
|
|
31
|
-
generations=kwargs.get('generations', 5),
|
|
32
|
-
population_size=kwargs.get('population_size', 10),
|
|
33
|
-
random_state=kwargs.get('random_state', 42),
|
|
34
|
-
max_time_mins=kwargs.get('max_time_mins', 2),
|
|
35
|
-
)
|
|
36
|
-
tpot.fit(X_train, y_train)
|
|
37
|
-
self.model = tpot.fitted_pipeline_
|
|
38
|
-
|
|
39
|
-
elif model_type == 'mlp':
|
|
40
|
-
mlp = build_model(X_train, y_train, **kwargs) # Adjust as necessary
|
|
41
|
-
self.model = mlp.fit(X_train, y_train)
|
|
42
|
-
|
|
43
|
-
else:
|
|
44
|
-
raise ValueError("Invalid model type. Choose 'xgb', 'mlp', or 'tpot'.")
|
|
45
|
-
|
|
46
|
-
return self.model
|
|
47
|
-
|
|
48
|
-
def calibrate(self, X_train, y_train, method='sigmoid', cv=5):
|
|
49
|
-
"""
|
|
50
|
-
Calibrate the fitted model using CalibratedClassifierCV.
|
|
51
|
-
|
|
52
|
-
Parameters:
|
|
53
|
-
- X_train: Features for training (for calibration).
|
|
54
|
-
- y_train: Labels for training (for calibration).
|
|
55
|
-
- method: Calibration method ('sigmoid' or 'isotonic').
|
|
56
|
-
- cv: Cross-validation splitting strategy.
|
|
57
|
-
"""
|
|
58
|
-
if self.model is None:
|
|
59
|
-
raise ValueError("Model is not fitted yet. Call 'fit' first.")
|
|
60
|
-
|
|
61
|
-
# Ensure the model supports predict_proba
|
|
62
|
-
if not hasattr(self.model, "predict_proba"):
|
|
63
|
-
raise AttributeError("The fitted model does not support predict_proba.")
|
|
64
|
-
|
|
65
|
-
# Initialize calibrated model
|
|
66
|
-
calibrated_model = CalibratedClassifierCV(self.model, method=method, cv=cv)
|
|
67
|
-
calibrated_model.fit(X_train, y_train)
|
|
68
|
-
|
|
69
|
-
# Replace the model with the calibrated model
|
|
70
|
-
self.model = calibrated_model
|
|
71
|
-
|
|
72
|
-
return self.model
|
|
73
|
-
def predict(self, X_test):
|
|
74
|
-
"""Make predictions using the fitted model."""
|
|
75
|
-
if self.model is None:
|
|
76
|
-
raise ValueError("Model is not fitted yet. Call 'fit_model' first.")
|
|
77
|
-
return self.model.predict(X_test)
|
|
78
|
-
|
|
79
|
-
def predict_proba(self, X_test):
|
|
80
|
-
"""Make predictions using the fitted model."""
|
|
81
|
-
if self.model is None:
|
|
82
|
-
raise ValueError("Model is not fitted yet. Call 'fit_model' first.")
|
|
1
|
+
import xgboost as xgb
|
|
2
|
+
from typing import Literal
|
|
3
|
+
from tpot import TPOTClassifier
|
|
4
|
+
from autofuzzts.models.mlp_nas import build_model
|
|
5
|
+
from sklearn.calibration import CalibratedClassifierCV
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FuzzyPipelineModel:
|
|
9
|
+
def __init__(self, n_fuzzy_sets: int, number_of_lags: int, verbosity: bool = False):
|
|
10
|
+
self.n_fuzzy_sets = n_fuzzy_sets
|
|
11
|
+
self.number_of_lags = number_of_lags
|
|
12
|
+
self.verbosity = verbosity
|
|
13
|
+
self.model = None # Placeholder for the fitted model
|
|
14
|
+
|
|
15
|
+
def fit(self, X_train, y_train, model_type:Literal['xgb', 'tpot','mlp']='xgb', **kwargs):
|
|
16
|
+
"""
|
|
17
|
+
Fit the model based on the specified model type and input parameters.
|
|
18
|
+
|
|
19
|
+
Parameters:
|
|
20
|
+
- X_train: Features for training.
|
|
21
|
+
- y_train: Labels for training.
|
|
22
|
+
- model_type: 'xgboost', 'mlp', or 'tpot'.
|
|
23
|
+
- kwargs: Additional parameters for model fitting.
|
|
24
|
+
"""
|
|
25
|
+
if model_type == 'xgb':
|
|
26
|
+
model = xgb.XGBClassifier(objective="multi:softmax", num_class=self.n_fuzzy_sets, **kwargs)
|
|
27
|
+
self.model = model.fit(X_train, y_train)
|
|
28
|
+
|
|
29
|
+
elif model_type == 'tpot':
|
|
30
|
+
tpot = TPOTClassifier(
|
|
31
|
+
generations=kwargs.get('generations', 5),
|
|
32
|
+
population_size=kwargs.get('population_size', 10),
|
|
33
|
+
random_state=kwargs.get('random_state', 42),
|
|
34
|
+
max_time_mins=kwargs.get('max_time_mins', 2),
|
|
35
|
+
)
|
|
36
|
+
tpot.fit(X_train, y_train)
|
|
37
|
+
self.model = tpot.fitted_pipeline_
|
|
38
|
+
|
|
39
|
+
elif model_type == 'mlp':
|
|
40
|
+
mlp = build_model(X_train, y_train, **kwargs) # Adjust as necessary
|
|
41
|
+
self.model = mlp.fit(X_train, y_train)
|
|
42
|
+
|
|
43
|
+
else:
|
|
44
|
+
raise ValueError("Invalid model type. Choose 'xgb', 'mlp', or 'tpot'.")
|
|
45
|
+
|
|
46
|
+
return self.model
|
|
47
|
+
|
|
48
|
+
def calibrate(self, X_train, y_train, method='sigmoid', cv=5):
|
|
49
|
+
"""
|
|
50
|
+
Calibrate the fitted model using CalibratedClassifierCV.
|
|
51
|
+
|
|
52
|
+
Parameters:
|
|
53
|
+
- X_train: Features for training (for calibration).
|
|
54
|
+
- y_train: Labels for training (for calibration).
|
|
55
|
+
- method: Calibration method ('sigmoid' or 'isotonic').
|
|
56
|
+
- cv: Cross-validation splitting strategy.
|
|
57
|
+
"""
|
|
58
|
+
if self.model is None:
|
|
59
|
+
raise ValueError("Model is not fitted yet. Call 'fit' first.")
|
|
60
|
+
|
|
61
|
+
# Ensure the model supports predict_proba
|
|
62
|
+
if not hasattr(self.model, "predict_proba"):
|
|
63
|
+
raise AttributeError("The fitted model does not support predict_proba.")
|
|
64
|
+
|
|
65
|
+
# Initialize calibrated model
|
|
66
|
+
calibrated_model = CalibratedClassifierCV(self.model, method=method, cv=cv)
|
|
67
|
+
calibrated_model.fit(X_train, y_train)
|
|
68
|
+
|
|
69
|
+
# Replace the model with the calibrated model
|
|
70
|
+
self.model = calibrated_model
|
|
71
|
+
|
|
72
|
+
return self.model
|
|
73
|
+
def predict(self, X_test):
|
|
74
|
+
"""Make predictions using the fitted model."""
|
|
75
|
+
if self.model is None:
|
|
76
|
+
raise ValueError("Model is not fitted yet. Call 'fit_model' first.")
|
|
77
|
+
return self.model.predict(X_test)
|
|
78
|
+
|
|
79
|
+
def predict_proba(self, X_test):
|
|
80
|
+
"""Make predictions using the fitted model."""
|
|
81
|
+
if self.model is None:
|
|
82
|
+
raise ValueError("Model is not fitted yet. Call 'fit_model' first.")
|
|
83
83
|
return self.model.predict_proba(X_test)
|
autofuzzts/models/mlp_nas.py
CHANGED
|
@@ -1,90 +1,90 @@
|
|
|
1
|
-
from sklearn.neural_network import MLPClassifier # Assuming you use sklearn's MLP
|
|
2
|
-
from sklearn.model_selection import train_test_split
|
|
3
|
-
import optuna
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _objective(trial, X, y):
|
|
7
|
-
"""
|
|
8
|
-
This is the objective function used for hyperparameter tuning
|
|
9
|
-
(internal function).
|
|
10
|
-
"""
|
|
11
|
-
# ... Same code as before inside objective function ...
|
|
12
|
-
|
|
13
|
-
# Split data
|
|
14
|
-
x_train, x_test, y_train, y_test = train_test_split(
|
|
15
|
-
X, y, test_size=0.2, random_state=42
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
# Define and train the model with suggested hyperparameters
|
|
19
|
-
clf = MLPClassifier(
|
|
20
|
-
hidden_layer_sizes=tuple(
|
|
21
|
-
trial.suggest_int(f"n_units_{i}", 10, 100)
|
|
22
|
-
for i in range(trial.suggest_int("n_layers", 1, 4))
|
|
23
|
-
),
|
|
24
|
-
activation=trial.suggest_categorical("activation", ["relu", "tanh"]),
|
|
25
|
-
solver="adam", # Using only 'adam' solver
|
|
26
|
-
alpha=trial.suggest_float("alpha", 1e-5, 1e-1, log=True),
|
|
27
|
-
learning_rate_init=trial.suggest_float("learning_rate_init", 1e-4, 1e-2, log=True),
|
|
28
|
-
max_iter=200,
|
|
29
|
-
random_state=42,
|
|
30
|
-
early_stopping=True,
|
|
31
|
-
n_iter_no_change=10,
|
|
32
|
-
)
|
|
33
|
-
clf.fit(x_train, y_train)
|
|
34
|
-
|
|
35
|
-
# Evaluate the model
|
|
36
|
-
return clf.score(x_test, y_test)
|
|
37
|
-
|
|
38
|
-
def build_model(X, y, n_trials=100):
|
|
39
|
-
"""
|
|
40
|
-
This function performs hyperparameter tuning, builds, trains, and
|
|
41
|
-
returns a fitted MLP classifier model.
|
|
42
|
-
|
|
43
|
-
Args:
|
|
44
|
-
X: Training data features.
|
|
45
|
-
y: Training data labels.
|
|
46
|
-
n_trials: Number of trials for hyperparameter tuning (default: 100).
|
|
47
|
-
|
|
48
|
-
Returns:
|
|
49
|
-
A fitted MLPClassifier model.
|
|
50
|
-
"""
|
|
51
|
-
# Define the neural network structure search space
|
|
52
|
-
study = optuna.create_study(direction="maximize")
|
|
53
|
-
study.optimize(lambda trial: _objective(trial, X, y), n_trials=n_trials)
|
|
54
|
-
|
|
55
|
-
# Print best parameters and best score
|
|
56
|
-
print(f"Best parameters: {study.best_params}")
|
|
57
|
-
print(f"Best score: {study.best_value}")
|
|
58
|
-
|
|
59
|
-
# Extract best hyperparameters
|
|
60
|
-
n_layers = study.best_params["n_layers"]
|
|
61
|
-
hidden_layer_sizes = tuple(
|
|
62
|
-
[study.best_params[f"n_units_{i}"] for i in range(n_layers)]
|
|
63
|
-
)
|
|
64
|
-
activation = study.best_params["activation"]
|
|
65
|
-
alpha = study.best_params["alpha"]
|
|
66
|
-
learning_rate_init = study.best_params["learning_rate_init"]
|
|
67
|
-
|
|
68
|
-
# Print model architecture
|
|
69
|
-
print("Model Architecture:")
|
|
70
|
-
print(f"\tNumber of Layers: {n_layers}")
|
|
71
|
-
print(f"\tHidden Layer Sizes: {hidden_layer_sizes}")
|
|
72
|
-
print(f"\tActivation Function: {activation}")
|
|
73
|
-
print(f"\tL2 Penalty (alpha): {alpha}")
|
|
74
|
-
print(f"\tInitial Learning Rate: {learning_rate_init}")
|
|
75
|
-
|
|
76
|
-
# Create the model with best hyperparameters
|
|
77
|
-
model = MLPClassifier(
|
|
78
|
-
hidden_layer_sizes=hidden_layer_sizes,
|
|
79
|
-
activation=activation,
|
|
80
|
-
solver="adam",
|
|
81
|
-
alpha=alpha,
|
|
82
|
-
learning_rate_init=learning_rate_init,
|
|
83
|
-
max_iter=200,
|
|
84
|
-
random_state=42,
|
|
85
|
-
early_stopping=True,
|
|
86
|
-
n_iter_no_change=10,
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
return model
|
|
90
|
-
|
|
1
|
+
from sklearn.neural_network import MLPClassifier # Assuming you use sklearn's MLP
|
|
2
|
+
from sklearn.model_selection import train_test_split
|
|
3
|
+
import optuna
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _objective(trial, X, y):
|
|
7
|
+
"""
|
|
8
|
+
This is the objective function used for hyperparameter tuning
|
|
9
|
+
(internal function).
|
|
10
|
+
"""
|
|
11
|
+
# ... Same code as before inside objective function ...
|
|
12
|
+
|
|
13
|
+
# Split data
|
|
14
|
+
x_train, x_test, y_train, y_test = train_test_split(
|
|
15
|
+
X, y, test_size=0.2, random_state=42
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# Define and train the model with suggested hyperparameters
|
|
19
|
+
clf = MLPClassifier(
|
|
20
|
+
hidden_layer_sizes=tuple(
|
|
21
|
+
trial.suggest_int(f"n_units_{i}", 10, 100)
|
|
22
|
+
for i in range(trial.suggest_int("n_layers", 1, 4))
|
|
23
|
+
),
|
|
24
|
+
activation=trial.suggest_categorical("activation", ["relu", "tanh"]),
|
|
25
|
+
solver="adam", # Using only 'adam' solver
|
|
26
|
+
alpha=trial.suggest_float("alpha", 1e-5, 1e-1, log=True),
|
|
27
|
+
learning_rate_init=trial.suggest_float("learning_rate_init", 1e-4, 1e-2, log=True),
|
|
28
|
+
max_iter=200,
|
|
29
|
+
random_state=42,
|
|
30
|
+
early_stopping=True,
|
|
31
|
+
n_iter_no_change=10,
|
|
32
|
+
)
|
|
33
|
+
clf.fit(x_train, y_train)
|
|
34
|
+
|
|
35
|
+
# Evaluate the model
|
|
36
|
+
return clf.score(x_test, y_test)
|
|
37
|
+
|
|
38
|
+
def build_model(X, y, n_trials=100):
|
|
39
|
+
"""
|
|
40
|
+
This function performs hyperparameter tuning, builds, trains, and
|
|
41
|
+
returns a fitted MLP classifier model.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
X: Training data features.
|
|
45
|
+
y: Training data labels.
|
|
46
|
+
n_trials: Number of trials for hyperparameter tuning (default: 100).
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
A fitted MLPClassifier model.
|
|
50
|
+
"""
|
|
51
|
+
# Define the neural network structure search space
|
|
52
|
+
study = optuna.create_study(direction="maximize")
|
|
53
|
+
study.optimize(lambda trial: _objective(trial, X, y), n_trials=n_trials)
|
|
54
|
+
|
|
55
|
+
# Print best parameters and best score
|
|
56
|
+
print(f"Best parameters: {study.best_params}")
|
|
57
|
+
print(f"Best score: {study.best_value}")
|
|
58
|
+
|
|
59
|
+
# Extract best hyperparameters
|
|
60
|
+
n_layers = study.best_params["n_layers"]
|
|
61
|
+
hidden_layer_sizes = tuple(
|
|
62
|
+
[study.best_params[f"n_units_{i}"] for i in range(n_layers)]
|
|
63
|
+
)
|
|
64
|
+
activation = study.best_params["activation"]
|
|
65
|
+
alpha = study.best_params["alpha"]
|
|
66
|
+
learning_rate_init = study.best_params["learning_rate_init"]
|
|
67
|
+
|
|
68
|
+
# Print model architecture
|
|
69
|
+
print("Model Architecture:")
|
|
70
|
+
print(f"\tNumber of Layers: {n_layers}")
|
|
71
|
+
print(f"\tHidden Layer Sizes: {hidden_layer_sizes}")
|
|
72
|
+
print(f"\tActivation Function: {activation}")
|
|
73
|
+
print(f"\tL2 Penalty (alpha): {alpha}")
|
|
74
|
+
print(f"\tInitial Learning Rate: {learning_rate_init}")
|
|
75
|
+
|
|
76
|
+
# Create the model with best hyperparameters
|
|
77
|
+
model = MLPClassifier(
|
|
78
|
+
hidden_layer_sizes=hidden_layer_sizes,
|
|
79
|
+
activation=activation,
|
|
80
|
+
solver="adam",
|
|
81
|
+
alpha=alpha,
|
|
82
|
+
learning_rate_init=learning_rate_init,
|
|
83
|
+
max_iter=200,
|
|
84
|
+
random_state=42,
|
|
85
|
+
early_stopping=True,
|
|
86
|
+
n_iter_no_change=10,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
return model
|
|
90
|
+
|