segmentae 1.5.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- segmentae/__init__.py +83 -0
- segmentae/anomaly_detection.py +20 -0
- segmentae/autoencoders/__init__.py +16 -0
- segmentae/autoencoders/batch_norm.py +208 -0
- segmentae/autoencoders/dense.py +211 -0
- segmentae/autoencoders/ensemble.py +219 -0
- segmentae/clusters/__init__.py +18 -0
- segmentae/clusters/clustering.py +171 -0
- segmentae/clusters/models.py +438 -0
- segmentae/clusters/registry.py +75 -0
- segmentae/core/__init__.py +65 -0
- segmentae/core/base.py +108 -0
- segmentae/core/constants.py +91 -0
- segmentae/core/exceptions.py +60 -0
- segmentae/core/types.py +55 -0
- segmentae/data_sources/__init__.py +3 -0
- segmentae/data_sources/examples.py +198 -0
- segmentae/metrics/__init__.py +6 -0
- segmentae/metrics/performance_metrics.py +119 -0
- segmentae/optimization/__init__.py +6 -0
- segmentae/optimization/optimizer.py +375 -0
- segmentae/pipeline/__init__.py +21 -0
- segmentae/pipeline/reconstruction.py +214 -0
- segmentae/pipeline/segmentae.py +562 -0
- segmentae/processing/__init__.py +21 -0
- segmentae/processing/preprocessing.py +263 -0
- segmentae/processing/simplifier.py +74 -0
- segmentae/utils/__init__.py +17 -0
- segmentae/utils/validation.py +94 -0
- segmentae-1.5.20.dist-info/METADATA +393 -0
- segmentae-1.5.20.dist-info/RECORD +34 -0
- segmentae-1.5.20.dist-info/WHEEL +5 -0
- segmentae-1.5.20.dist-info/licenses/LICENSE +21 -0
- segmentae-1.5.20.dist-info/top_level.txt +1 -0
segmentae/__init__.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SegmentAE: A Python Library for Anomaly Detection Optimization
|
|
3
|
+
|
|
4
|
+
SegmentAE enhances anomaly detection performance through the optimization of
|
|
5
|
+
reconstruction error by integrating clustering methods with tabular autoencoders.
|
|
6
|
+
|
|
7
|
+
Key Components:
|
|
8
|
+
- Preprocessing: Data preparation with encoding, scaling, and imputation
|
|
9
|
+
- Clustering: Multiple clustering algorithms (KMeans, GMM, Agglomerative)
|
|
10
|
+
- SegmentAE: Main pipeline integrating autoencoders and clustering
|
|
11
|
+
- Autoencoders: Dense, BatchNorm, and Ensemble implementations
|
|
12
|
+
- Optimizer: Grid search for optimal configuration
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
__version__ = "2.0.0"
|
|
16
|
+
__author__ = "Luís Fernando da Silva Santos"
|
|
17
|
+
|
|
18
|
+
from segmentae.autoencoders.batch_norm import BatchNormAutoencoder
|
|
19
|
+
|
|
20
|
+
# Note: Autoencoders are kept in their original location
|
|
21
|
+
# They should be imported directly when available:
|
|
22
|
+
from segmentae.autoencoders.dense import DenseAutoencoder
|
|
23
|
+
from segmentae.autoencoders.ensemble import EnsembleAutoencoder
|
|
24
|
+
|
|
25
|
+
# Clustering
|
|
26
|
+
from segmentae.clusters import Clustering
|
|
27
|
+
|
|
28
|
+
# Core components
|
|
29
|
+
from segmentae.core import (
|
|
30
|
+
ClusterModel,
|
|
31
|
+
EncoderType,
|
|
32
|
+
ImputerType,
|
|
33
|
+
PhaseType,
|
|
34
|
+
ScalerType,
|
|
35
|
+
ThresholdMetric,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Data Sources
|
|
39
|
+
from segmentae.data_sources import load_dataset
|
|
40
|
+
|
|
41
|
+
# Metrics
|
|
42
|
+
from segmentae.metrics import metrics_classification, metrics_regression
|
|
43
|
+
|
|
44
|
+
# Optimization
|
|
45
|
+
from segmentae.optimization import SegmentAE_Optimizer
|
|
46
|
+
|
|
47
|
+
# Pipeline
|
|
48
|
+
from segmentae.pipeline import SegmentAE
|
|
49
|
+
|
|
50
|
+
# Preprocessing
|
|
51
|
+
from segmentae.processing.preprocessing import Preprocessing
|
|
52
|
+
|
|
53
|
+
__all__ = [
|
|
54
|
+
# Version info
|
|
55
|
+
'__version__',
|
|
56
|
+
'__author__',
|
|
57
|
+
|
|
58
|
+
# Core enums
|
|
59
|
+
'PhaseType',
|
|
60
|
+
'ClusterModel',
|
|
61
|
+
'ThresholdMetric',
|
|
62
|
+
'EncoderType',
|
|
63
|
+
'ScalerType',
|
|
64
|
+
'ImputerType',
|
|
65
|
+
|
|
66
|
+
# Main classes
|
|
67
|
+
'Preprocessing',
|
|
68
|
+
'Clustering',
|
|
69
|
+
'SegmentAE',
|
|
70
|
+
'SegmentAE_Optimizer',
|
|
71
|
+
|
|
72
|
+
# Metrics
|
|
73
|
+
'metrics_classification',
|
|
74
|
+
'metrics_regression',
|
|
75
|
+
|
|
76
|
+
# Data
|
|
77
|
+
'load_dataset',
|
|
78
|
+
|
|
79
|
+
# Autoencoders
|
|
80
|
+
'DenseAutoencoder',
|
|
81
|
+
'BatchNormAutoencoder',
|
|
82
|
+
'EnsembleAutoencoder',
|
|
83
|
+
]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from segmentae.autoencoders.batch_norm import BatchNormAutoencoder
|
|
2
|
+
from segmentae.autoencoders.dense import DenseAutoencoder
|
|
3
|
+
from segmentae.autoencoders.ensemble import EnsembleAutoencoder
|
|
4
|
+
from segmentae.clusters.clustering import Clustering
|
|
5
|
+
|
|
6
|
+
# Metrics
|
|
7
|
+
from segmentae.metrics.performance_metrics import metrics_classification, metrics_regression
|
|
8
|
+
from segmentae.pipeline.segmentae import SegmentAE
|
|
9
|
+
from segmentae.processing.preprocessing import Preprocessing
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
'SegmentAE',
|
|
13
|
+
'Preprocessing',
|
|
14
|
+
'Clustering',
|
|
15
|
+
'metrics_classification',
|
|
16
|
+
'metrics_regression',
|
|
17
|
+
'DenseAutoencoder',
|
|
18
|
+
'BatchNormAutoencoder',
|
|
19
|
+
'EnsembleAutoencoder',
|
|
20
|
+
]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Autoencoders module for SegmentAE.
|
|
3
|
+
|
|
4
|
+
This module provides autoencoder implementations including Dense,
|
|
5
|
+
BatchNorm, and Ensemble autoencoders for anomaly detection.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from segmentae.autoencoders.batch_norm import BatchNormAutoencoder
|
|
9
|
+
from segmentae.autoencoders.dense import DenseAutoencoder
|
|
10
|
+
from segmentae.autoencoders.ensemble import EnsembleAutoencoder
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
'DenseAutoencoder',
|
|
14
|
+
'BatchNormAutoencoder',
|
|
15
|
+
'EnsembleAutoencoder'
|
|
16
|
+
]
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
|
4
|
+
warnings.filterwarnings("ignore")
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
|
|
7
|
+
import matplotlib.pyplot as plt
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from keras.callbacks import EarlyStopping
|
|
11
|
+
from keras.layers import BatchNormalization, Dense, Dropout, Input
|
|
12
|
+
from keras.models import Model
|
|
13
|
+
from keras.optimizers import SGD, Adadelta, Adagrad, Adam, Adamax, Nadam, RMSprop
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BatchNormAutoencoder:
|
|
17
|
+
def __init__(self,
|
|
18
|
+
hidden_dims: List[int] = [32, 16, 8],
|
|
19
|
+
encoder_activation: str = 'relu',
|
|
20
|
+
decoder_activation: str = 'relu',
|
|
21
|
+
optimizer: str = 'adam',
|
|
22
|
+
learning_rate: float = 0.001,
|
|
23
|
+
epochs: int = 300,
|
|
24
|
+
val_size: float = 0.15,
|
|
25
|
+
stopping_patient: int = 10,
|
|
26
|
+
dropout_rate: float = 0,
|
|
27
|
+
batch_size: Optional[int] = None):
|
|
28
|
+
"""
|
|
29
|
+
BatchNormAutoencoder is a class for building and training a batch normalization dense autoencoder model.
|
|
30
|
+
|
|
31
|
+
Parameters:
|
|
32
|
+
- hidden_dims (list): List of integers representing the sizes of hidden layers.
|
|
33
|
+
- encoder_activation (str): Activation function for the encoder layers. Possible options include 'relu', 'tanh', 'elu', 'selu' and 'linear'.
|
|
34
|
+
The chosen function should be appropriate for the type of data and the desired complexity of the model's representation.
|
|
35
|
+
- decoder_activation (str): Activation function for the decoder layers. Possible options are the same as for encoder_activation.
|
|
36
|
+
- optimizer_type (str): Adam is widely used due to its adaptive learning rate properties, which makes it effective for a wide range of problems.
|
|
37
|
+
SGD (Stochastic Gradient Descent) is one of the oldest and most studied optimization algorithms. It's simple but can be very effective, especially with the right learning rate schedules and momentum.
|
|
38
|
+
RMSprop is designed to solve some of SGD’s problems by using a moving average of squared gradients to normalize the gradient. This helps in adaptive learning rate adjustments.
|
|
39
|
+
Adagrad adjusts the learning rate based on the parameters. It performs larger updates for infrequent parameters and smaller updates for frequent parameters, which is useful for sparse data.
|
|
40
|
+
Adadelta is an extension of Adagrad that seeks to reduce its aggressive, monotonically decreasing learning rate. It does this by limiting the window of accumulated past gradients to some fixed size.
|
|
41
|
+
Adamax is a variant of Adam based on the infinity norm, which can sometimes outperform Adam, especially in models that are highly sensitive to the choice of hyperparameters.
|
|
42
|
+
Nadam combines Adam and Nesterov momentum, aiming to leverage the benefits of both.
|
|
43
|
+
- learning_rate (int): Learning rate for the Adam optimizer.
|
|
44
|
+
- epochs (int): Number of epochs for training the autoencoder.
|
|
45
|
+
- val_size (float): Fraction of the data to be used as validation data during training.
|
|
46
|
+
- stopping_patient (int): Number of epochs with no improvement after which training will be stopped.
|
|
47
|
+
- dropout_rate (float): The fraction of the input units to drop during training, which helps prevent overfitting by making the network's representations more robust. Typical values range from 0.1 to 0.5.
|
|
48
|
+
- batch_size (int): Number of samples per gradient update.
|
|
49
|
+
"""
|
|
50
|
+
self.autoencoder = None
|
|
51
|
+
self.input_dim = None
|
|
52
|
+
self.hidden_dims = hidden_dims
|
|
53
|
+
self.encoder_activation = encoder_activation
|
|
54
|
+
self.decoder_activation = decoder_activation
|
|
55
|
+
self.optimizer = optimizer
|
|
56
|
+
self.learning_rate = learning_rate
|
|
57
|
+
self.epochs = epochs
|
|
58
|
+
self.val_size = val_size
|
|
59
|
+
self.stopping_patient = stopping_patient
|
|
60
|
+
self.dropout_rate = dropout_rate
|
|
61
|
+
self.batch_size = batch_size
|
|
62
|
+
|
|
63
|
+
def _get_optimizer(self):
|
|
64
|
+
optimizers = {
|
|
65
|
+
'adam': Adam(learning_rate=self.learning_rate),
|
|
66
|
+
'sgd': SGD(learning_rate=self.learning_rate),
|
|
67
|
+
'rmsprop': RMSprop(learning_rate=self.learning_rate),
|
|
68
|
+
'adagrad': Adagrad(learning_rate=self.learning_rate),
|
|
69
|
+
'adadelta': Adadelta(learning_rate=self.learning_rate),
|
|
70
|
+
'adamax': Adamax(learning_rate=self.learning_rate),
|
|
71
|
+
'nadam': Nadam(learning_rate=self.learning_rate)
|
|
72
|
+
}
|
|
73
|
+
if self.optimizer in optimizers:
|
|
74
|
+
return optimizers[self.optimizer]
|
|
75
|
+
else:
|
|
76
|
+
raise ValueError(f"Unsupported optimizer: {self.optimizer}. Supported optimizers are: {list(optimizers.keys())}")
|
|
77
|
+
|
|
78
|
+
def fit(self, input_data: pd.DataFrame):
|
|
79
|
+
"""
|
|
80
|
+
Trains the BatchNormAutoencoder model on the provided input data. This method performs the following steps:
|
|
81
|
+
|
|
82
|
+
1. Data Preparation:
|
|
83
|
+
- Copies the input data to avoid modifying the original dataset.
|
|
84
|
+
- Determines and stores the input dimension (number of features).
|
|
85
|
+
|
|
86
|
+
2. Model Validation:
|
|
87
|
+
- Checks if any specified hidden layer sizes exceeds the number of input features considerable.
|
|
88
|
+
- Prints a warning if there is a risk of overfitting due to excessively large hidden layers.
|
|
89
|
+
|
|
90
|
+
3. Model Construction:
|
|
91
|
+
- Defines the input layer with a shape matching the input data's feature dimension.
|
|
92
|
+
- Sequentially adds dense layers for the encoder, each followed by batch normalization and dropout layers to regularize the network.
|
|
93
|
+
- Constructs the decoder by sequentially adding dense layers in reverse order (excluding the last encoder layer) to mirror the encoder's structure.
|
|
94
|
+
- Adds batch normalization and dropout layers to the decoder to maintain regularization.
|
|
95
|
+
- Concludes with a final dense layer using sigmoid activation to reconstruct the input data.
|
|
96
|
+
|
|
97
|
+
4. Model Compilation:
|
|
98
|
+
- Selects the optimizer based on the specified type using the `_get_optimizer` method.
|
|
99
|
+
- Compiles the autoencoder model with the chosen optimizer and mean squared error as the loss function.
|
|
100
|
+
|
|
101
|
+
5. Early Stopping Configuration:
|
|
102
|
+
- Configures early stopping to monitor validation loss.
|
|
103
|
+
- Defines stopping criteria to halt training if validation loss does not improve over a specified number of epochs, thereby preventing overfitting and conserving computational resources.
|
|
104
|
+
|
|
105
|
+
6. Model Training:
|
|
106
|
+
- Trains the autoencoder using the `fit` method with the following specifications:
|
|
107
|
+
- Uses the training data for both input (`x`) and target (`y`) as the autoencoder aims to reconstruct its input.
|
|
108
|
+
- Sets the number of epochs as specified.
|
|
109
|
+
- Specifies batch size if provided.
|
|
110
|
+
- Shuffles the training data at each epoch to ensure the model does not learn the data order.
|
|
111
|
+
- Splits a fraction of the training data for validation.
|
|
112
|
+
- Utilizes early stopping to monitor and control the training process based on validation performance.
|
|
113
|
+
|
|
114
|
+
7. Return:
|
|
115
|
+
- Returns the trained autoencoder model for further use or evaluation.
|
|
116
|
+
|
|
117
|
+
Parameters:
|
|
118
|
+
- input_data (pd.DataFrame): A pandas DataFrame containing the training data. Each row represents a sample, and each column represents a feature.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
- autoencoder (Model): The trained autoencoder model.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
train = input_data.copy()
|
|
125
|
+
|
|
126
|
+
# Get input dimension
|
|
127
|
+
self.input_dim = train.shape[1]
|
|
128
|
+
|
|
129
|
+
# Verify and construct the hidden units
|
|
130
|
+
if np.max(self.hidden_dims) > 3*self.input_dim:
|
|
131
|
+
print("Layers neurons exceed considerably the number input features risking overfitting,"
|
|
132
|
+
"it is suggested to reduce neurons to enhance generalization. \n")
|
|
133
|
+
|
|
134
|
+
# Define the input layer
|
|
135
|
+
input_layer = Input(shape=(self.input_dim,))
|
|
136
|
+
|
|
137
|
+
# Encoder layers with batch normalization
|
|
138
|
+
encoded = input_layer
|
|
139
|
+
for dim in self.hidden_dims:
|
|
140
|
+
encoded = Dense(dim, activation=self.encoder_activation)(encoded)
|
|
141
|
+
encoded = BatchNormalization()(encoded)
|
|
142
|
+
encoded = Dropout(self.dropout_rate)(encoded)
|
|
143
|
+
|
|
144
|
+
# Decoder layers with batch normalization
|
|
145
|
+
decoded = encoded
|
|
146
|
+
for dim in reversed(self.hidden_dims[:-1]):
|
|
147
|
+
decoded = Dense(dim, activation=self.decoder_activation)(decoded)
|
|
148
|
+
decoded = BatchNormalization()(decoded)
|
|
149
|
+
decoded = Dropout(self.dropout_rate)(decoded)
|
|
150
|
+
decoded = Dense(self.input_dim, activation='sigmoid')(decoded)
|
|
151
|
+
|
|
152
|
+
# Create and compile the model
|
|
153
|
+
self.autoencoder = Model(input_layer, decoded)
|
|
154
|
+
self.autoencoder.compile(optimizer=self._get_optimizer(),
|
|
155
|
+
loss='mean_squared_error')
|
|
156
|
+
|
|
157
|
+
# Define early stopping criteria
|
|
158
|
+
early_stopping = EarlyStopping(monitor='val_loss', patience=self.stopping_patient,
|
|
159
|
+
verbose=1, mode='min', restore_best_weights=True)
|
|
160
|
+
|
|
161
|
+
# Train the model
|
|
162
|
+
self.history = self.autoencoder.fit(x=train, y=train, epochs=self.epochs, batch_size=self.batch_size,
|
|
163
|
+
shuffle=True, validation_split=self.val_size, callbacks=[early_stopping])
|
|
164
|
+
|
|
165
|
+
return self.autoencoder
|
|
166
|
+
|
|
167
|
+
def summary(self):
|
|
168
|
+
"""
|
|
169
|
+
Print the summary of the autoencoder model.
|
|
170
|
+
"""
|
|
171
|
+
if self.autoencoder is not None:
|
|
172
|
+
self.autoencoder.summary()
|
|
173
|
+
else:
|
|
174
|
+
print("Model is not built yet. Please call build_model() or fit().")
|
|
175
|
+
|
|
176
|
+
def evaluate(self, input_data: pd.DataFrame):
|
|
177
|
+
"""
|
|
178
|
+
Evaluate the batch normalization dense autoencoder model on given input data.
|
|
179
|
+
"""
|
|
180
|
+
return self.autoencoder.evaluate(input_data, input_data)
|
|
181
|
+
|
|
182
|
+
def predict(self, input_data: pd.DataFrame):
|
|
183
|
+
"""
|
|
184
|
+
Use the batch normalization dense autoencoder model to generate predictions on given input data.
|
|
185
|
+
"""
|
|
186
|
+
return self.autoencoder.predict(input_data)
|
|
187
|
+
|
|
188
|
+
def save_model(self, file_path):
|
|
189
|
+
"""
|
|
190
|
+
Save the trained BatchNorm model to a file.
|
|
191
|
+
"""
|
|
192
|
+
self.autoencoder.save(file_path)
|
|
193
|
+
|
|
194
|
+
def plot_training_loss(self):
|
|
195
|
+
"""
|
|
196
|
+
Plot the training and validation loss history.
|
|
197
|
+
"""
|
|
198
|
+
if self.history is None:
|
|
199
|
+
print("No training history available. Please fit the model first.")
|
|
200
|
+
return
|
|
201
|
+
|
|
202
|
+
plt.plot(self.history.history['loss'], label='Training Loss')
|
|
203
|
+
plt.plot(self.history.history['val_loss'], label='Validation Loss')
|
|
204
|
+
plt.title('Training and Validation Loss')
|
|
205
|
+
plt.xlabel('Epochs')
|
|
206
|
+
plt.ylabel('Loss')
|
|
207
|
+
plt.legend()
|
|
208
|
+
plt.show()
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
|
4
|
+
warnings.filterwarnings("ignore")
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
|
|
7
|
+
import matplotlib.pyplot as plt
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from keras.callbacks import EarlyStopping
|
|
11
|
+
from keras.layers import Dense, Dropout, Input
|
|
12
|
+
from keras.models import Model
|
|
13
|
+
from keras.optimizers import SGD, Adadelta, Adagrad, Adam, Adamax, Nadam, RMSprop
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DenseAutoencoder:
|
|
17
|
+
def __init__(self,
|
|
18
|
+
hidden_dims: List[int] = [12, 8, 4],
|
|
19
|
+
encoder_activation: str = 'relu', # Activation function for encoder layers
|
|
20
|
+
decoder_activation: str = 'relu', # Activation function for decoder layers
|
|
21
|
+
optimizer: str = 'adam',
|
|
22
|
+
learning_rate: float = 0.001,
|
|
23
|
+
epochs: int = 300,
|
|
24
|
+
val_size: float = 0.15,
|
|
25
|
+
stopping_patient: int = 10,
|
|
26
|
+
dropout_rate: float = 0,
|
|
27
|
+
batch_size: Optional[int] = None):
|
|
28
|
+
"""
|
|
29
|
+
DenseAutoencoder is a class for building and training a dense autoencoder model.
|
|
30
|
+
|
|
31
|
+
Parameters:
|
|
32
|
+
- hidden_dims (list): List of integers representing the sizes of hidden layers.
|
|
33
|
+
- encoder_activation (str): Activation function for the encoder layers. Possible options include 'relu', 'tanh', 'elu', 'selu' and 'linear'.
|
|
34
|
+
The chosen function should be appropriate for the type of data and the desired complexity of the model's representation.
|
|
35
|
+
- decoder_activation (str): Activation function for the decoder layers. Possible options are the same as for encoder_activation.
|
|
36
|
+
- optimizer (str): Adam is widely used due to its adaptive learning rate properties, which makes it effective for a wide range of problems.
|
|
37
|
+
SGD (Stochastic Gradient Descent) is one of the oldest and most studied optimization algorithms. It's simple but can be very effective, especially with the right learning rate schedules and momentum.
|
|
38
|
+
RMSprop is designed to solve some of SGD’s problems by using a moving average of squared gradients to normalize the gradient. This helps in adaptive learning rate adjustments.
|
|
39
|
+
Adagrad adjusts the learning rate based on the parameters. It performs larger updates for infrequent parameters and smaller updates for frequent parameters, which is useful for sparse data.
|
|
40
|
+
Adadelta is an extension of Adagrad that seeks to reduce its aggressive, monotonically decreasing learning rate. It does this by limiting the window of accumulated past gradients to some fixed size.
|
|
41
|
+
Adamax is a variant of Adam based on the infinity norm, which can sometimes outperform Adam, especially in models that are highly sensitive to the choice of hyperparameters.
|
|
42
|
+
Nadam combines Adam and Nesterov momentum, aiming to leverage the benefits of both.
|
|
43
|
+
- learning_rate (float): Learning rate for the Adam optimizer.
|
|
44
|
+
- epochs (int): Number of epochs for training the autoencoder.
|
|
45
|
+
- val_size (float): Fraction of the data to be used as validation data during training.
|
|
46
|
+
- stopping_patient (int): Number of epochs with no improvement after which training will be stopped.
|
|
47
|
+
- dropout_rate (float): The fraction of the input units to drop during training, which helps prevent overfitting by making the network's representations more robust. Typical values range from 0.1 to 0.5.
|
|
48
|
+
- batch_size (int): Number of samples per gradient update.
|
|
49
|
+
"""
|
|
50
|
+
self.autoencoder = None
|
|
51
|
+
self.input_dim = None
|
|
52
|
+
self.hidden_dims = hidden_dims
|
|
53
|
+
self.encoder_activation = encoder_activation
|
|
54
|
+
self.decoder_activation = decoder_activation
|
|
55
|
+
self.optimizer = optimizer
|
|
56
|
+
self.learning_rate = learning_rate
|
|
57
|
+
self.epochs = epochs
|
|
58
|
+
self.val_size = val_size
|
|
59
|
+
self.stopping_patient = stopping_patient
|
|
60
|
+
self.dropout_rate = dropout_rate
|
|
61
|
+
self.batch_size = batch_size
|
|
62
|
+
|
|
63
|
+
def _get_optimizer(self):
|
|
64
|
+
optimizers = {
|
|
65
|
+
'adam': Adam(learning_rate=self.learning_rate),
|
|
66
|
+
'sgd': SGD(learning_rate=self.learning_rate),
|
|
67
|
+
'rmsprop': RMSprop(learning_rate=self.learning_rate),
|
|
68
|
+
'adagrad': Adagrad(learning_rate=self.learning_rate),
|
|
69
|
+
'adadelta': Adadelta(learning_rate=self.learning_rate),
|
|
70
|
+
'adamax': Adamax(learning_rate=self.learning_rate),
|
|
71
|
+
'nadam': Nadam(learning_rate=self.learning_rate)
|
|
72
|
+
}
|
|
73
|
+
if self.optimizer in optimizers:
|
|
74
|
+
return optimizers[self.optimizer]
|
|
75
|
+
else:
|
|
76
|
+
raise ValueError(f"Unsupported optimizer: {self.optimizer}. Supported optimizers are: {list(optimizers.keys())}")
|
|
77
|
+
|
|
78
|
+
def fit(self, input_data: pd.DataFrame):
|
|
79
|
+
"""
|
|
80
|
+
Trains the DenseAutoencoder model on the provided input data. This method systematically performs the following steps:
|
|
81
|
+
|
|
82
|
+
1. Data Preparation:
|
|
83
|
+
- The input data is copied to ensure that the original dataset remains unaltered during the training process.
|
|
84
|
+
- The dimensionality of the input data (number of features) is determined and stored in the instance variable `self.input_dim`.
|
|
85
|
+
|
|
86
|
+
2. Model Validation:
|
|
87
|
+
- A validation check is performed to ensure that none of the specified hidden layer sizes exceed three times the number of input features.
|
|
88
|
+
- If any hidden layer size surpasses this threshold, a warning message is printed, alerting the user about the potential risk of overfitting. This serves as a guideline to adjust the layer sizes for better model generalization.
|
|
89
|
+
|
|
90
|
+
3. Model Construction:
|
|
91
|
+
- **Input Layer**: An input layer is defined with a shape corresponding to the number of features in the input data, serving as the entry point for the data into the neural network.
|
|
92
|
+
- **Encoder Layers**: The encoder part of the network is constructed sequentially:
|
|
93
|
+
- Dense layers are added according to the specified hidden dimensions.
|
|
94
|
+
- Each dense layer uses the specified activation function and includes L2 regularization to penalize large weights, helping to prevent overfitting.
|
|
95
|
+
- Dropout layers are added after each dense layer to further reduce overfitting by randomly setting a fraction of input units to zero during training.
|
|
96
|
+
- **Decoder Layers**: The decoder part of the network is constructed to mirror the encoder:
|
|
97
|
+
- Dense layers are added in reverse order of the encoder's hidden dimensions, excluding the last encoder layer to maintain symmetry.
|
|
98
|
+
- Dropout layers are added to the decoder layers similarly to enhance regularization.
|
|
99
|
+
- A final dense layer with sigmoid activation is added to reconstruct the input data, ensuring the output values are in the range [0, 1].
|
|
100
|
+
|
|
101
|
+
4. Model Compilation:
|
|
102
|
+
- The optimizer is selected using the `_get_optimizer` method, which retrieves the appropriate optimizer instance based on the specified optimizer type.
|
|
103
|
+
- The autoencoder model is compiled with the chosen optimizer and mean squared error as the loss function. This loss function measures the reconstruction error between the input data and its reconstruction by the autoencoder.
|
|
104
|
+
|
|
105
|
+
5. Early Stopping Configuration:
|
|
106
|
+
- Early stopping is configured to monitor the validation loss during training.
|
|
107
|
+
- The training process will be halted if the validation loss does not improve for a specified number of epochs (`stopping_patient`). This prevents overfitting and saves computational resources by stopping training once the model stops improving.
|
|
108
|
+
- The best model weights, as determined by the lowest validation loss, are restored at the end of training.
|
|
109
|
+
|
|
110
|
+
6. Model Training:
|
|
111
|
+
- The autoencoder model is trained using the `fit` method with the following specifications:
|
|
112
|
+
- Both input (`x`) and target (`y`) data are set to the training data, as the autoencoder aims to learn to reconstruct its input.
|
|
113
|
+
- The number of training epochs is set to the specified value (`epochs`).
|
|
114
|
+
- The batch size for gradient updates is set if provided.
|
|
115
|
+
- Data shuffling is enabled at each epoch to ensure the model does not learn the order of the training data, improving generalization.
|
|
116
|
+
- A fraction of the training data is used for validation, specified by `val_size`, to monitor the model's performance on unseen data during training.
|
|
117
|
+
- Early stopping is employed through callbacks to control the training process based on validation performance.
|
|
118
|
+
|
|
119
|
+
7. Return:
|
|
120
|
+
- The trained autoencoder model is returned for further use, such as evaluating its performance on test data or using it for feature extraction.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
train = input_data.copy()
|
|
124
|
+
|
|
125
|
+
# Get input dimension
|
|
126
|
+
self.input_dim = train.shape[1]
|
|
127
|
+
|
|
128
|
+
# Verify and construct the hidden units
|
|
129
|
+
if np.max(self.hidden_dims) > 3*self.input_dim:
|
|
130
|
+
print("Layers neurons exceeds considerably the number input features risking overfitting, "
|
|
131
|
+
"it is suggested to reduce neurons to enhance generalization. \n")
|
|
132
|
+
|
|
133
|
+
# Define the input layer
|
|
134
|
+
input_layer = Input(shape=(self.input_dim,))
|
|
135
|
+
|
|
136
|
+
# Encoder layers
|
|
137
|
+
encoder = input_layer
|
|
138
|
+
for dim in self.hidden_dims:
|
|
139
|
+
encoder = Dense(dim, activation=self.encoder_activation)(encoder)
|
|
140
|
+
encoder = Dropout(self.dropout_rate)(encoder)
|
|
141
|
+
|
|
142
|
+
# Decoder layers
|
|
143
|
+
decoder = encoder
|
|
144
|
+
for dim in reversed(self.hidden_dims[:-1]):
|
|
145
|
+
decoder = Dense(dim, activation=self.decoder_activation)(decoder)
|
|
146
|
+
decoder = Dropout(self.dropout_rate)(decoder)
|
|
147
|
+
decoder = Dense(self.input_dim, activation="sigmoid")(decoder)
|
|
148
|
+
|
|
149
|
+
# Create the autoencoder model
|
|
150
|
+
self.autoencoder = Model(inputs=input_layer, outputs=decoder)
|
|
151
|
+
|
|
152
|
+
# Compile the model
|
|
153
|
+
self.autoencoder.compile(optimizer=self._get_optimizer(),
|
|
154
|
+
loss="mean_squared_error")
|
|
155
|
+
|
|
156
|
+
# Define early stopping criteria
|
|
157
|
+
early_stopping = EarlyStopping(monitor='val_loss',
|
|
158
|
+
patience=self.stopping_patient,
|
|
159
|
+
verbose=1,
|
|
160
|
+
mode='min',
|
|
161
|
+
restore_best_weights=True)
|
|
162
|
+
|
|
163
|
+
# Train the model
|
|
164
|
+
self.history = self.autoencoder.fit(x=train, y=train, epochs=self.epochs, batch_size=self.batch_size,
|
|
165
|
+
shuffle=True, validation_split=self.val_size, verbose=1,
|
|
166
|
+
callbacks=[early_stopping])
|
|
167
|
+
|
|
168
|
+
return self.autoencoder
|
|
169
|
+
|
|
170
|
+
def summary(self):
|
|
171
|
+
"""
|
|
172
|
+
Print the summary of the autoencoder model.
|
|
173
|
+
"""
|
|
174
|
+
if self.autoencoder is not None:
|
|
175
|
+
self.autoencoder.summary()
|
|
176
|
+
else:
|
|
177
|
+
print("Model is not built yet. Please call build_model() or fit().")
|
|
178
|
+
|
|
179
|
+
def evaluate(self, input_data: pd.DataFrame):
|
|
180
|
+
"""
|
|
181
|
+
Evaluate the autoencoder model on given input data.
|
|
182
|
+
"""
|
|
183
|
+
return self.autoencoder.evaluate(input_data, input_data)
|
|
184
|
+
|
|
185
|
+
def predict(self, input_data: pd.DataFrame):
|
|
186
|
+
"""
|
|
187
|
+
Use the autoencoder model to generate predictions on given input data.
|
|
188
|
+
"""
|
|
189
|
+
return self.autoencoder.predict(input_data)
|
|
190
|
+
|
|
191
|
+
def save_model(self, file_path):
|
|
192
|
+
"""
|
|
193
|
+
Save the trained Dense model to a file.
|
|
194
|
+
"""
|
|
195
|
+
self.autoencoder.save(file_path)
|
|
196
|
+
|
|
197
|
+
def plot_training_loss(self):
|
|
198
|
+
"""
|
|
199
|
+
Plot the training and validation loss history.
|
|
200
|
+
"""
|
|
201
|
+
if self.history is None:
|
|
202
|
+
print("No training history available. Please fit the model first.")
|
|
203
|
+
return
|
|
204
|
+
|
|
205
|
+
plt.plot(self.history.history['loss'], label='Training Loss')
|
|
206
|
+
plt.plot(self.history.history['val_loss'], label='Validation Loss')
|
|
207
|
+
plt.title('Training and Validation Loss')
|
|
208
|
+
plt.xlabel('Epochs')
|
|
209
|
+
plt.ylabel('Loss')
|
|
210
|
+
plt.legend()
|
|
211
|
+
plt.show()
|