segmentae 1.5.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- segmentae/__init__.py +83 -0
- segmentae/anomaly_detection.py +20 -0
- segmentae/autoencoders/__init__.py +16 -0
- segmentae/autoencoders/batch_norm.py +208 -0
- segmentae/autoencoders/dense.py +211 -0
- segmentae/autoencoders/ensemble.py +219 -0
- segmentae/clusters/__init__.py +18 -0
- segmentae/clusters/clustering.py +171 -0
- segmentae/clusters/models.py +438 -0
- segmentae/clusters/registry.py +75 -0
- segmentae/core/__init__.py +65 -0
- segmentae/core/base.py +108 -0
- segmentae/core/constants.py +91 -0
- segmentae/core/exceptions.py +60 -0
- segmentae/core/types.py +55 -0
- segmentae/data_sources/__init__.py +3 -0
- segmentae/data_sources/examples.py +198 -0
- segmentae/metrics/__init__.py +6 -0
- segmentae/metrics/performance_metrics.py +119 -0
- segmentae/optimization/__init__.py +6 -0
- segmentae/optimization/optimizer.py +375 -0
- segmentae/pipeline/__init__.py +21 -0
- segmentae/pipeline/reconstruction.py +214 -0
- segmentae/pipeline/segmentae.py +562 -0
- segmentae/processing/__init__.py +21 -0
- segmentae/processing/preprocessing.py +263 -0
- segmentae/processing/simplifier.py +74 -0
- segmentae/utils/__init__.py +17 -0
- segmentae/utils/validation.py +94 -0
- segmentae-1.5.20.dist-info/METADATA +393 -0
- segmentae-1.5.20.dist-info/RECORD +34 -0
- segmentae-1.5.20.dist-info/WHEEL +5 -0
- segmentae-1.5.20.dist-info/licenses/LICENSE +21 -0
- segmentae-1.5.20.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: segmentae
|
|
3
|
+
Version: 1.5.20
|
|
4
|
+
Summary: SegmentAE: A Python Library for Anomaly Detection Optimization
|
|
5
|
+
Home-page: https://github.com/TsLu1s/SegmentAE
|
|
6
|
+
Author: Luís Fernando da Silva Santos
|
|
7
|
+
Author-email: luisf_ssantos@hotmail.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: pythondata science,machine learning,deep learning,neural networks,autoencoder,clustering,anomaly detection,novelty detectionfraud detection,data preprocessing
|
|
10
|
+
Classifier: Intended Audience :: Education
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Intended Audience :: Customer Service
|
|
14
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
15
|
+
Classifier: Intended Audience :: Healthcare Industry
|
|
16
|
+
Classifier: Intended Audience :: Telecommunications Industry
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
20
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
21
|
+
Classifier: Operating System :: OS Independent
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: pandas>=1.2.0
|
|
27
|
+
Requires-Dist: numpy>=1.19.5
|
|
28
|
+
Requires-Dist: atlantic==1.1.67
|
|
29
|
+
Requires-Dist: tensorflow>=2.10.0
|
|
30
|
+
Requires-Dist: ucimlrepo>=0.0.7
|
|
31
|
+
Requires-Dist: scipy>=1.11.4
|
|
32
|
+
Requires-Dist: pydantic==2.0.0
|
|
33
|
+
Requires-Dist: matplotlib==3.9.3
|
|
34
|
+
Dynamic: author
|
|
35
|
+
Dynamic: author-email
|
|
36
|
+
Dynamic: classifier
|
|
37
|
+
Dynamic: description
|
|
38
|
+
Dynamic: description-content-type
|
|
39
|
+
Dynamic: home-page
|
|
40
|
+
Dynamic: keywords
|
|
41
|
+
Dynamic: license
|
|
42
|
+
Dynamic: license-file
|
|
43
|
+
Dynamic: requires-dist
|
|
44
|
+
Dynamic: summary
|
|
45
|
+
|
|
46
|
+
[![LinkedIn][linkedin-shield]][linkedin-url]
|
|
47
|
+
[![Contributors][contributors-shield]][contributors-url]
|
|
48
|
+
[![Stargazers][stars-shield]][stars-url]
|
|
49
|
+
[![MIT License][license-shield]][license-url]
|
|
50
|
+
[![Downloads][downloads-shield]][downloads-url]
|
|
51
|
+
[![Month Downloads][downloads-month-shield]][downloads-month-url]
|
|
52
|
+
|
|
53
|
+
[contributors-shield]: https://img.shields.io/github/contributors/TsLu1s/SegmentAE.svg?style=for-the-badge&logo=github&logoColor=white
|
|
54
|
+
[contributors-url]: https://github.com/TsLu1s/SegmentAE/graphs/contributors
|
|
55
|
+
[stars-shield]: https://img.shields.io/github/stars/TsLu1s/SegmentAE.svg?style=for-the-badge&logo=github&logoColor=white
|
|
56
|
+
[stars-url]: https://github.com/TsLu1s/SegmentAE/stargazers
|
|
57
|
+
[license-shield]: https://img.shields.io/github/license/TsLu1s/SegmentAE.svg?style=for-the-badge&logo=opensource&logoColor=white
|
|
58
|
+
[license-url]: https://github.com/TsLu1s/SegmentAE/blob/main/LICENSE
|
|
59
|
+
[linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
|
|
60
|
+
[linkedin-url]: https://www.linkedin.com/in/luisfssantos98/
|
|
61
|
+
[downloads-shield]: https://static.pepy.tech/personalized-badge/segmentae?period=total&units=international_system&left_color=grey&right_color=blue&left_text=Total%20Downloads
|
|
62
|
+
[downloads-url]: https://pepy.tech/project/segmentae
|
|
63
|
+
[downloads-month-shield]: https://static.pepy.tech/personalized-badge/segmentae?period=month&units=international_system&left_color=grey&right_color=blue&left_text=Month%20Downloads
|
|
64
|
+
[downloads-month-url]: https://pepy.tech/project/segmentae
|
|
65
|
+
|
|
66
|
+
## Framework Overview
|
|
67
|
+
|
|
68
|
+
`SegmentAE` is designed to enhance anomaly detection performance through the optimization of reconstruction error by integrating and intersecting clustering methods with tabular autoencoders. Built with enterprise-grade architecture, it provides a versatile, scalable, and robust solution for anomaly detection applications in domains such as financial fraud detection, network security, and industrial monitoring.
|
|
69
|
+
|
|
70
|
+
### Key Architectural Features (v2.0+)
|
|
71
|
+
|
|
72
|
+
- **Professional Architecture**: Clean separation of concerns with robust principles
|
|
73
|
+
- **Type Safety**: Comprehensive Pydantic validation and type hints throughout
|
|
74
|
+
- **Design Patterns**: Registry, Strategy, and Template Method patterns
|
|
75
|
+
- **Enum-Based Configuration**: Type-safe constants for all parameters
|
|
76
|
+
- **Custom Exceptions**: Informative error messages with actionable suggestions
|
|
77
|
+
|
|
78
|
+
## Key Features and Capabilities
|
|
79
|
+
|
|
80
|
+
### 1. General Applicability on Tabular Datasets
|
|
81
|
+
|
|
82
|
+
SegmentAE is engineered to handle a wide range of tabular datasets, making it suitable for various anomaly detection tasks across different use case contexts. It can be seamlessly integrated into diverse applications, ensuring broad utility and adaptability.
|
|
83
|
+
|
|
84
|
+
### 2. Optimization and Customization
|
|
85
|
+
|
|
86
|
+
The framework offers complete configurability for each component of the anomaly detection pipeline, including:
|
|
87
|
+
- **Data Preprocessing**: Encoding, scaling, and imputation with Pydantic validation
|
|
88
|
+
- **Clustering Algorithms**: Registry-based clustering with easy extensibility
|
|
89
|
+
- **Autoencoder Integration**: Support for custom Keras/TensorFlow models or built-in implementations
|
|
90
|
+
|
|
91
|
+
Each component can be fine-tuned to achieve optimal performance tailored to specific use cases.
|
|
92
|
+
|
|
93
|
+
### 3. Enhanced Detection Performance
|
|
94
|
+
|
|
95
|
+
By leveraging a combination of clustering algorithms and advanced anomaly detection techniques, SegmentAE aims to improve the accuracy and reliability of anomaly detection. The integration of tabular autoencoders with clustering mechanisms ensures that the framework effectively captures and identifies different patterns in the input data, optimizing the reconstruction error for each cluster, thereby enhancing predictive performance.
|
|
96
|
+
|
|
97
|
+
## Main Development Tools
|
|
98
|
+
|
|
99
|
+
Major frameworks used to build this project:
|
|
100
|
+
|
|
101
|
+
* [TensorFlow](https://www.tensorflow.org/)
|
|
102
|
+
* [Keras](https://keras.io/)
|
|
103
|
+
* [Scikit-Learn](https://scikit-learn.org/stable/)
|
|
104
|
+
* [Atlantic](https://pypi.org/project/atlantic/)
|
|
105
|
+
* [Pydantic](https://pydantic-docs.helpmanual.io/)
|
|
106
|
+
|
|
107
|
+
## Where to Get It
|
|
108
|
+
|
|
109
|
+
Binary installer for the latest released version is available at the Python Package Index [(PyPI)](https://pypi.org/project/segmentae/).
|
|
110
|
+
|
|
111
|
+
GitHub Project Link: [https://github.com/TsLu1s/SegmentAE](https://github.com/TsLu1s/SegmentAE)
|
|
112
|
+
|
|
113
|
+
## Installation
|
|
114
|
+
|
|
115
|
+
To install this package from the PyPI repository, run the following command:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
pip install segmentae
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## SegmentAE - Technical Components and Pipeline Structure
|
|
122
|
+
|
|
123
|
+
The SegmentAE framework consists of several integrated components, each playing a critical role in the optimization of anomaly detection through clustering and tabular autoencoders. The pipeline is structured with professional design patterns to ensure seamless data flow and modular customization.
|
|
124
|
+
|
|
125
|
+
### 1. Data Preprocessing
|
|
126
|
+
|
|
127
|
+
Proper preprocessing is crucial for ensuring the quality and consistency of data. The preprocessing module now includes:
|
|
128
|
+
|
|
129
|
+
- **Pydantic Validation**: Automatic type checking and conversion
|
|
130
|
+
- **Type-Safe Configuration**: Enum-based parameter selection
|
|
131
|
+
- **Missing Value Imputation**: Simple statistical imputation methods
|
|
132
|
+
- **Normalization**: MinMax, Standard, and Robust scaling options
|
|
133
|
+
- **Categorical Encoding**: Inverse Frequency, Label, and One-Hot Encoding
|
|
134
|
+
|
|
135
|
+
**Example:**
|
|
136
|
+
```python
|
|
137
|
+
from segmentae.preprocessing import Preprocessing
|
|
138
|
+
from segmentae.core import EncoderType, ScalerType
|
|
139
|
+
|
|
140
|
+
# Type-safe configuration with enums
|
|
141
|
+
pr = Preprocessing(
|
|
142
|
+
encoder=EncoderType.IFREQUENCY,
|
|
143
|
+
scaler=ScalerType.MINMAX,
|
|
144
|
+
imputer="Simple" # Strings also are supported
|
|
145
|
+
)
|
|
146
|
+
pr.fit(X_train)
|
|
147
|
+
X_transformed = pr.transform(X_test)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### 2. Clustering
|
|
151
|
+
|
|
152
|
+
Clustering forms the backbone of the SegmentAE framework, provided with easy extensibility:
|
|
153
|
+
|
|
154
|
+
- **Registry Pattern**: Clean model registration and instantiation
|
|
155
|
+
- **Type Safety**: Pydantic validation for all parameters
|
|
156
|
+
- **Four Algorithms**: K-Means, MiniBatch K-Means, Gaussian Mixture, Agglomerative
|
|
157
|
+
- **Extensible Design**: Easy to add new clustering algorithms
|
|
158
|
+
|
|
159
|
+
**Example:**
|
|
160
|
+
```python
|
|
161
|
+
from segmentae.clustering import Clustering
|
|
162
|
+
from segmentae.core import ClusterModel
|
|
163
|
+
|
|
164
|
+
cl = Clustering(
|
|
165
|
+
cluster_model=[ClusterModel.KMEANS], # Enum-based
|
|
166
|
+
n_clusters=3
|
|
167
|
+
)
|
|
168
|
+
cl.clustering_fit(X_train)
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### 3. Anomaly Detection - Autoencoders
|
|
172
|
+
|
|
173
|
+
The core of the SegmentAE framework employs advanced autoencoder architectures:
|
|
174
|
+
|
|
175
|
+
- **Three Baseline Implementations**: Dense, BatchNorm, and Ensemble autoencoders
|
|
176
|
+
- **Custom Model Support**: Integrate any Keras/TensorFlow model
|
|
177
|
+
- **Full Customization**: Network architecture, training epochs, activation layers, and more
|
|
178
|
+
- **Type-Safe Integration**: Validated through protocols
|
|
179
|
+
|
|
180
|
+
The framework includes three baseline autoencoder algorithms for user application, allowing complete customization of network architecture, training parameters, and activation functions.
|
|
181
|
+
|
|
182
|
+
**Custom Model Integration:**
|
|
183
|
+
You can build your own autoencoder model (Keras-based) and integrate it seamlessly into the SegmentAE pipeline →
|
|
184
|
+
<a href="https://github.com/TsLu1s/SegmentAE/blob/main/examples/basic_model.py" style="text-decoration:none;">
|
|
185
|
+
<img src="https://img.shields.io/badge/Custom%20Model-blue?style=for-the-badge&logo=readme&logoColor=white" alt="Custom Model">
|
|
186
|
+
</a>
|
|
187
|
+
|
|
188
|
+
**Unlabeled Data Support:**
|
|
189
|
+
Application example for totally unlabeled data available here →
|
|
190
|
+
<a href="https://github.com/TsLu1s/SegmentAE/blob/main/examples/unlabeled_application.py" style="text-decoration:none;">
|
|
191
|
+
<img src="https://img.shields.io/badge/Unlabeled%20Example-blue?style=for-the-badge&logo=readme&logoColor=white" alt="Unlabeled Example">
|
|
192
|
+
</a>
|
|
193
|
+
|
|
194
|
+
## SegmentAE - Predictive Application
|
|
195
|
+
|
|
196
|
+
The following example demonstrates the complete workflow from data loading to anomaly detection using a DenseAutoencoder integrated with KMeans clustering.
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
import pandas as pd
|
|
200
|
+
from segmentae.data_sources.examples import load_dataset
|
|
201
|
+
from segmentae.anomaly_detection import (
|
|
202
|
+
SegmentAE,
|
|
203
|
+
Preprocessing,
|
|
204
|
+
Clustering,
|
|
205
|
+
DenseAutoencoder
|
|
206
|
+
)
|
|
207
|
+
from sklearn.model_selection import train_test_split
|
|
208
|
+
|
|
209
|
+
############################################################################################
|
|
210
|
+
### Data Loading
|
|
211
|
+
|
|
212
|
+
train, test, target = load_dataset(
|
|
213
|
+
dataset_selection='htru2_dataset', # Data Loading Example
|
|
214
|
+
split_ratio=0.75
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
test, future_data = train_test_split(test, train_size=0.9, random_state=5)
|
|
218
|
+
|
|
219
|
+
# Reset indices (required)
|
|
220
|
+
train = train.reset_index(drop=True)
|
|
221
|
+
test = test.reset_index(drop=True)
|
|
222
|
+
future_data = future_data.reset_index(drop=True)
|
|
223
|
+
|
|
224
|
+
# Separate features and targets
|
|
225
|
+
X_train, y_train = train.drop(columns=[target]).copy(), train[target].astype(int)
|
|
226
|
+
X_test, y_test = test.drop(columns=[target]).copy(), test[target].astype(int)
|
|
227
|
+
X_future_data = future_data.drop(columns=[target]).copy()
|
|
228
|
+
|
|
229
|
+
############################################################################################
|
|
230
|
+
### Preprocessing
|
|
231
|
+
|
|
232
|
+
pr = Preprocessing(
|
|
233
|
+
encoder="IFrequencyEncoder", # Options: "IFrequencyEncoder", "LabelEncoder",
|
|
234
|
+
scaler="MinMaxScaler", # "OneHotEncoder", None
|
|
235
|
+
imputer=None # Options: "Simple", None
|
|
236
|
+
) # Note: Advanced imputation removed in v2.0
|
|
237
|
+
|
|
238
|
+
pr.fit(X=X_train)
|
|
239
|
+
X_train = pr.transform(X=X_train)
|
|
240
|
+
X_test = pr.transform(X=X_test)
|
|
241
|
+
X_future_data = pr.transform(X=X_future_data)
|
|
242
|
+
|
|
243
|
+
############################################################################################
|
|
244
|
+
### Clustering Implementation
|
|
245
|
+
|
|
246
|
+
cl_model = Clustering(
|
|
247
|
+
cluster_model=["KMeans"], # Options: KMeans, MiniBatchKMeans, GMM, Agglomerative
|
|
248
|
+
n_clusters=3
|
|
249
|
+
)
|
|
250
|
+
cl_model.clustering_fit(X=X_train)
|
|
251
|
+
|
|
252
|
+
############################################################################################
|
|
253
|
+
### Autoencoder Implementation
|
|
254
|
+
|
|
255
|
+
denseAutoencoder = DenseAutoencoder(
|
|
256
|
+
hidden_dims=[16, 12, 8, 4],
|
|
257
|
+
encoder_activation='relu',
|
|
258
|
+
decoder_activation='relu',
|
|
259
|
+
optimizer='adam',
|
|
260
|
+
learning_rate=0.001,
|
|
261
|
+
epochs=150,
|
|
262
|
+
val_size=0.15,
|
|
263
|
+
stopping_patient=20,
|
|
264
|
+
dropout_rate=0.1,
|
|
265
|
+
batch_size=None
|
|
266
|
+
)
|
|
267
|
+
denseAutoencoder.fit(input_data=X_train)
|
|
268
|
+
denseAutoencoder.summary()
|
|
269
|
+
|
|
270
|
+
############################################################################################
|
|
271
|
+
### Autoencoder + Clustering Integration
|
|
272
|
+
|
|
273
|
+
sg = SegmentAE(ae_model=denseAutoencoder, cl_model=cl_model)
|
|
274
|
+
|
|
275
|
+
############################################################################################
|
|
276
|
+
### Train Reconstruction
|
|
277
|
+
|
|
278
|
+
sg.reconstruction(
|
|
279
|
+
input_data=X_train,
|
|
280
|
+
threshold_metric='mse' # Options: mse, mae, rmse, max_error
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
############################################################################################
|
|
284
|
+
### Reconstruction Performance Evaluation
|
|
285
|
+
|
|
286
|
+
results = sg.evaluation(
|
|
287
|
+
input_data=X_test,
|
|
288
|
+
target_col=y_test,
|
|
289
|
+
threshold_ratio=2.0 # Threshold multiplier
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# Access test metadata by cluster
|
|
293
|
+
preds_test, recon_metrics_test = sg.preds_test, sg.reconstruction_test
|
|
294
|
+
|
|
295
|
+
# View global metrics
|
|
296
|
+
print(results['global metrics'])
|
|
297
|
+
print(results['clusters metrics'])
|
|
298
|
+
|
|
299
|
+
############################################################################################
|
|
300
|
+
### Anomaly Detection Predictions
|
|
301
|
+
|
|
302
|
+
predictions = sg.detections(
|
|
303
|
+
input_data=X_future_data,
|
|
304
|
+
threshold_ratio=2.0
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
print(predictions['Predicted Anomalies'].value_counts())
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
## Grid Search Optimizer
|
|
311
|
+
|
|
312
|
+
SegmentAE includes a comprehensive optimization methodology through the `SegmentAE_Optimizer` class to systematically identify optimal configurations.
|
|
313
|
+
|
|
314
|
+
The optimizer evaluates combinations of:
|
|
315
|
+
- Multiple autoencoders
|
|
316
|
+
- Different clustering algorithms
|
|
317
|
+
- Various cluster numbers
|
|
318
|
+
- Different threshold ratios
|
|
319
|
+
|
|
320
|
+
**Example:**
|
|
321
|
+
```python
|
|
322
|
+
from segmentae.optimization import SegmentAE_Optimizer
|
|
323
|
+
|
|
324
|
+
optimizer = SegmentAE_Optimizer(
|
|
325
|
+
autoencoder_models=[autoencoder1, autoencoder2],
|
|
326
|
+
n_clusters_list=[2, 3, 4],
|
|
327
|
+
cluster_models=["KMeans", "GMM", "MiniBatchKMeans"],
|
|
328
|
+
threshold_ratios=[1, 1.5, 2, 3],
|
|
329
|
+
performance_metric='f1_score' # or 'Accuracy', 'Precision', 'Recall'
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
# Run grid search
|
|
333
|
+
best_model = optimizer.optimize(X_train, X_test, y_test)
|
|
334
|
+
|
|
335
|
+
# View results
|
|
336
|
+
print(f"Best Performance: {optimizer.best_performance}")
|
|
337
|
+
print(f"Best Configuration:")
|
|
338
|
+
print(f" - Clusters: {optimizer.best_n_clusters}")
|
|
339
|
+
print(f" - Threshold: {optimizer.best_threshold_ratio}")
|
|
340
|
+
print("\nLeaderboard:")
|
|
341
|
+
print(optimizer.leaderboard.head(10))
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
For a complete optimizer example → <a href="https://github.com/TsLu1s/SegmentAE/blob/main/examples/optimizer_application.py" style="text-decoration:none;">
|
|
345
|
+
<img src="https://img.shields.io/badge/Optimizer%20Application-blue?style=for-the-badge&logo=readme&logoColor=white" alt="Optimizer Application">
|
|
346
|
+
</a>
|
|
347
|
+
|
|
348
|
+
### Template Example Applications
|
|
349
|
+
|
|
350
|
+
### 1. Basic Custom Model
|
|
351
|
+
Use your own Keras autoencoder with SegmentAE:
|
|
352
|
+
- **Example:** [basic_model.py](https://github.com/TsLu1s/segmentae/blob/main/examples/custom_autoencoders.py)
|
|
353
|
+
- Shows custom Sequential model integration
|
|
354
|
+
- Demonstrates multiple threshold evaluation
|
|
355
|
+
|
|
356
|
+
### 2. Baseline Autoencoders
|
|
357
|
+
Use built-in DenseAutoencoder or BatchNormAutoencoder:
|
|
358
|
+
- **Example:** [baseline_models.py](https://github.com/TsLu1s/segmentae/blob/main/examples/baseline.py)
|
|
359
|
+
- Shows built-in autoencoder usage
|
|
360
|
+
- Includes model summary and training visualization
|
|
361
|
+
|
|
362
|
+
### 3. Grid Search Optimization
|
|
363
|
+
Find optimal configuration automatically:
|
|
364
|
+
- **Example:** [optimizer_application.py](https://github.com/TsLu1s/segmentae/blob/main/examples/optimizer.py)
|
|
365
|
+
- Evaluates multiple autoencoders and clustering configs
|
|
366
|
+
- Multiple clustering algorithms
|
|
367
|
+
- Generates performance leaderboard
|
|
368
|
+
|
|
369
|
+
### 4. Unlabeled Data Detection
|
|
370
|
+
Detect anomalies without ground truth labels:
|
|
371
|
+
- **Example:** [unlabeled_application.py](https://github.com/TsLu1s/segmentae/blob/main/examples/unlabeled.py)
|
|
372
|
+
- Shows reconstruction-only workflow
|
|
373
|
+
- Useful for production deployment
|
|
374
|
+
|
|
375
|
+
If you use SegmentAE in your research, please cite:
|
|
376
|
+
|
|
377
|
+
```bibtex
|
|
378
|
+
@software{segmentae2024,
|
|
379
|
+
author = {Luís Fernando Santos},
|
|
380
|
+
title = {SegmentAE: A Python Library for Anomaly Detection Optimization},
|
|
381
|
+
year = {2024},
|
|
382
|
+
publisher = {PyPI},
|
|
383
|
+
url = {https://pypi.org/project/segmentae/}
|
|
384
|
+
}
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
## License
|
|
388
|
+
|
|
389
|
+
Distributed under the MIT License. See [LICENSE](https://github.com/TsLu1s/SegmentAE/blob/main/LICENSE) for more information.
|
|
390
|
+
|
|
391
|
+
## Contact
|
|
392
|
+
|
|
393
|
+
Luis Santos - [LinkedIn](https://www.linkedin.com/in/luisfssantos98/)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
segmentae/__init__.py,sha256=GBlQU-q8-1PpiQYq9KJ_gqmvQxjjN7iL5J4BBjj_SSY,2135
|
|
2
|
+
segmentae/anomaly_detection.py,sha256=1191HWo0FfrHnYoKsSo69fsI1i8TssrJLqzYzhDFKSE,682
|
|
3
|
+
segmentae/autoencoders/__init__.py,sha256=RANceVS3TIYfaUCaUqaP4xZtw6gEyyGRK0Et4t7dZhs,467
|
|
4
|
+
segmentae/autoencoders/batch_norm.py,sha256=moH2Pj7ACWrjo4y0FnoUzRnX6EK0Bmm8kjvyn2XOoTE,11351
|
|
5
|
+
segmentae/autoencoders/dense.py,sha256=kjkXIhMD2BXFV_fJ74zT8-aT83Ab_TqunN7gY83J_78,12670
|
|
6
|
+
segmentae/autoencoders/ensemble.py,sha256=QqdAN4CfgelXr9ZanDIVJtQ766XMbB134kPru-EtNXk,10369
|
|
7
|
+
segmentae/clusters/__init__.py,sha256=EgJbS84o7Yv-eRRvbP3UdBvggGZBYHcTqxk1dOej9cU,474
|
|
8
|
+
segmentae/clusters/clustering.py,sha256=0Vypwi5ibN1gMJautH-VtBrDBUUM-XuVtdvPBw1AGZc,6190
|
|
9
|
+
segmentae/clusters/models.py,sha256=BiwjTSGpCEtXjWEg5_DAe0A_4_iIAjR0tXYCn3OikuI,14828
|
|
10
|
+
segmentae/clusters/registry.py,sha256=Ny_uKWfkVjgP7v-9xGN0BaYlr8N47wTDxN3nRiL5JHI,2397
|
|
11
|
+
segmentae/core/__init__.py,sha256=UcH45LiMd3n1igbYhkNAQU8rImBsM1YwTxHlTMjJ_jY,1401
|
|
12
|
+
segmentae/core/base.py,sha256=UWf-Y7z6mGiBOK2Be7R0Qi0sM0AajUswTcb_fuLDC_M,3010
|
|
13
|
+
segmentae/core/constants.py,sha256=N3k0ZrRTQXZR3VP45-dky6sKPyrTEhzz8BtFhlPLDnI,2611
|
|
14
|
+
segmentae/core/exceptions.py,sha256=RlaTYmGTHkvfwctry8bp0oV9-8cLV5ji0mPvyJBwsdA,2060
|
|
15
|
+
segmentae/core/types.py,sha256=pds5CjAqjoIHIXHo0VxXUnU9M66DSLvu6EMtl_n4xNs,1334
|
|
16
|
+
segmentae/data_sources/__init__.py,sha256=NHHYcrubqqgPYnFtpzoD9kE-memzSPJwHYb9LYglcSo,86
|
|
17
|
+
segmentae/data_sources/examples.py,sha256=n7-KgdIkekUYcF-eR09eip81VspaEFuYi-l8KRbZRJ0,6261
|
|
18
|
+
segmentae/metrics/__init__.py,sha256=8aeEsX83BuZWe4jg_aCsx4oWjmMCFYviyANSlNnhN1A,167
|
|
19
|
+
segmentae/metrics/performance_metrics.py,sha256=XahBtM-g8gmyy95n6nQ7iA2CZwXQKKSJzc1ycPG1rG8,3497
|
|
20
|
+
segmentae/optimization/__init__.py,sha256=vhKfas8VRbiK_Pv8jpelKWBRuUncaaERapdibCxUsKY,150
|
|
21
|
+
segmentae/optimization/optimizer.py,sha256=tdRxfd2VEXAxikCo7FbJb4fvlRRemZb94rPZ53J62xU,14206
|
|
22
|
+
segmentae/pipeline/__init__.py,sha256=R0ImzTI_fOpycXbiFo_Gl-ordqvAyfOpML_5dktDqrU,579
|
|
23
|
+
segmentae/pipeline/reconstruction.py,sha256=cCt6IOGzUcaQDX99dWRsVMnOrqekeMoqFCprsV1GOQw,6849
|
|
24
|
+
segmentae/pipeline/segmentae.py,sha256=Mrzy25rh-kSH9LnxZnotigat7Ky2pw1IIxmaaCoUF14,22807
|
|
25
|
+
segmentae/processing/__init__.py,sha256=R0ImzTI_fOpycXbiFo_Gl-ordqvAyfOpML_5dktDqrU,579
|
|
26
|
+
segmentae/processing/preprocessing.py,sha256=wcAGcYJTEfyGNO_JjlZ9UEYMniE22XKfl9h9x5yaeYQ,9015
|
|
27
|
+
segmentae/processing/simplifier.py,sha256=ZQP2dd8A_bECOpps2lByONzJ7Hh5IL9sRLLcFCBTScs,2820
|
|
28
|
+
segmentae/utils/__init__.py,sha256=uOqiuWVIXgslcqcoxI1hS8EhSLQzHFU07IleG1y-GlM,394
|
|
29
|
+
segmentae/utils/validation.py,sha256=1N-HRlM2wChMcpRhrZDa4J_I8WcDXWs8uD3MSIK-o0o,2781
|
|
30
|
+
segmentae-1.5.20.dist-info/licenses/LICENSE,sha256=6-FKIkKxpU5gZ4qEXyul9aZ0ju-Vp-iT0eN50oVY6cw,1108
|
|
31
|
+
segmentae-1.5.20.dist-info/METADATA,sha256=Oj7163xegMVTXTpUTsZ0m1o9BCM8nz_CM3SnMeseMMw,16117
|
|
32
|
+
segmentae-1.5.20.dist-info/WHEEL,sha256=lTU6B6eIfYoiQJTZNc-fyaR6BpL6ehTzU3xGYxn2n8k,91
|
|
33
|
+
segmentae-1.5.20.dist-info/top_level.txt,sha256=KiqzjD-Ec5yUZLjKLqZq_fH9MQuHrbkuSuHZOtL3ADk,10
|
|
34
|
+
segmentae-1.5.20.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Luís Fernando da Silva Santos
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
segmentae
|