pyod 2.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyod/__init__.py +7 -0
- pyod/models/__init__.py +26 -0
- pyod/models/abod.py +308 -0
- pyod/models/ae1svm.py +380 -0
- pyod/models/alad.py +494 -0
- pyod/models/anogan.py +444 -0
- pyod/models/auto_encoder.py +220 -0
- pyod/models/base.py +713 -0
- pyod/models/base_dl.py +435 -0
- pyod/models/cblof.py +332 -0
- pyod/models/cd.py +202 -0
- pyod/models/cof.py +215 -0
- pyod/models/combination.py +177 -0
- pyod/models/copod.py +287 -0
- pyod/models/deep_svdd.py +399 -0
- pyod/models/devnet.py +335 -0
- pyod/models/dif.py +456 -0
- pyod/models/ecod.py +295 -0
- pyod/models/feature_bagging.py +409 -0
- pyod/models/gaal_base.py +83 -0
- pyod/models/gmm.py +281 -0
- pyod/models/hbos.py +351 -0
- pyod/models/iforest.py +322 -0
- pyod/models/inne.py +252 -0
- pyod/models/kde.py +184 -0
- pyod/models/knn.py +277 -0
- pyod/models/kpca.py +393 -0
- pyod/models/lmdd.py +218 -0
- pyod/models/loci.py +246 -0
- pyod/models/loda.py +204 -0
- pyod/models/lof.py +225 -0
- pyod/models/lscp.py +408 -0
- pyod/models/lunar.py +368 -0
- pyod/models/mad.py +150 -0
- pyod/models/mcd.py +236 -0
- pyod/models/mo_gaal.py +287 -0
- pyod/models/ocsvm.py +230 -0
- pyod/models/pca.py +354 -0
- pyod/models/qmcd.py +156 -0
- pyod/models/rgraph.py +559 -0
- pyod/models/rod.py +450 -0
- pyod/models/sampling.py +192 -0
- pyod/models/sklearn_base.py +105 -0
- pyod/models/so_gaal.py +233 -0
- pyod/models/so_gaal_new.py +175 -0
- pyod/models/sod.py +199 -0
- pyod/models/sos.py +306 -0
- pyod/models/suod.py +267 -0
- pyod/models/thresholds.py +665 -0
- pyod/models/vae.py +339 -0
- pyod/models/xgbod.py +458 -0
- pyod/utils/__init__.py +28 -0
- pyod/utils/data.py +652 -0
- pyod/utils/example.py +201 -0
- pyod/utils/stat_models.py +251 -0
- pyod/utils/torch_utility.py +457 -0
- pyod/utils/utility.py +588 -0
- pyod/version.py +23 -0
- pyod-2.0.5.dist-info/METADATA +668 -0
- pyod-2.0.5.dist-info/RECORD +63 -0
- pyod-2.0.5.dist-info/WHEEL +5 -0
- pyod-2.0.5.dist-info/licenses/LICENSE +25 -0
- pyod-2.0.5.dist-info/top_level.txt +1 -0
pyod/__init__.py
ADDED
pyod/models/__init__.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# from .abod import ABOD
|
|
3
|
+
# from .auto_encoder import AutoEncoder
|
|
4
|
+
# from .cblof import CBLOF
|
|
5
|
+
# from .combination import aom, moa, average, maximization
|
|
6
|
+
# from .feature_bagging import FeatureBagging
|
|
7
|
+
# from .hbos import HBOS
|
|
8
|
+
# from .iforest import IForest
|
|
9
|
+
# from .knn import KNN
|
|
10
|
+
# from .lof import LOF
|
|
11
|
+
# from .mcd import MCD
|
|
12
|
+
# from .ocsvm import OCSVM
|
|
13
|
+
# from .pca import PCA
|
|
14
|
+
#
|
|
15
|
+
# __all__ = ['ABOD',
|
|
16
|
+
# 'AutoEncoder',
|
|
17
|
+
# 'CBLOF',
|
|
18
|
+
# 'aom', 'moa', 'average', 'maximization',
|
|
19
|
+
# 'FeatureBagging',
|
|
20
|
+
# 'HBOS',
|
|
21
|
+
# 'IForest',
|
|
22
|
+
# 'KNN',
|
|
23
|
+
# 'LOF',
|
|
24
|
+
# 'MCD',
|
|
25
|
+
# 'OCSVM',
|
|
26
|
+
# 'PCA']
|
pyod/models/abod.py
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""Angle-based Outlier Detector (ABOD)
|
|
3
|
+
"""
|
|
4
|
+
# Author: Yue Zhao <yzhao062@gmail.com>
|
|
5
|
+
# License: BSD 2 clause
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
import warnings
|
|
9
|
+
from itertools import combinations
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
from numba import njit
|
|
13
|
+
from sklearn.neighbors import KDTree
|
|
14
|
+
from sklearn.neighbors import NearestNeighbors
|
|
15
|
+
from sklearn.utils import check_array
|
|
16
|
+
from sklearn.utils.validation import check_is_fitted
|
|
17
|
+
|
|
18
|
+
from .base import BaseDetector
|
|
19
|
+
from ..utils.utility import check_parameter
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@njit
|
|
23
|
+
def _wcos(curr_pt, a, b): # pragma: no cover
|
|
24
|
+
"""Internal function to calculate weighted cosine using optimized
|
|
25
|
+
numba code.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
curr_pt : numpy array of shape (n_samples, n_features)
|
|
30
|
+
Current sample to be calculated.
|
|
31
|
+
|
|
32
|
+
a : numpy array of shape (n_samples, n_features)
|
|
33
|
+
Training sample a.
|
|
34
|
+
|
|
35
|
+
b : numpy array of shape (n_samples, n_features)
|
|
36
|
+
Training sample b.
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
wcos : float in range [-1, 1]
|
|
41
|
+
Cosine similarity between a-curr_pt and b-curr_pt.
|
|
42
|
+
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
a_curr = a - curr_pt
|
|
46
|
+
b_curr = b - curr_pt
|
|
47
|
+
|
|
48
|
+
# wcos = (<a_curr, b_curr>/((|a_curr|*|b_curr|)^2)
|
|
49
|
+
wcos = np.dot(a_curr, b_curr) / (
|
|
50
|
+
np.linalg.norm(a_curr, 2) ** 2) / (
|
|
51
|
+
np.linalg.norm(b_curr, 2) ** 2)
|
|
52
|
+
return wcos
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _calculate_wocs(curr_pt, X, X_ind):
|
|
56
|
+
"""Calculated the variance of weighted cosine of a point.
|
|
57
|
+
wcos = (<a_curr, b_curr>/((|a_curr|*|b_curr|)^2)
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
curr_pt : numpy array, shape (1, n_features)
|
|
62
|
+
The sample to be calculated.
|
|
63
|
+
|
|
64
|
+
X : numpy array of shape (n_samples, n_features)
|
|
65
|
+
The training dataset.
|
|
66
|
+
|
|
67
|
+
X_ind : list
|
|
68
|
+
The valid index of the training data.
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
cos_angle_var : float
|
|
73
|
+
The variance of cosine angle
|
|
74
|
+
|
|
75
|
+
"""
|
|
76
|
+
wcos_list = []
|
|
77
|
+
curr_pair_inds = list(combinations(X_ind, 2))
|
|
78
|
+
for j, (a_ind, b_ind) in enumerate(curr_pair_inds):
|
|
79
|
+
a = X[a_ind, :]
|
|
80
|
+
b = X[b_ind, :]
|
|
81
|
+
|
|
82
|
+
# skip if no angle can be formed
|
|
83
|
+
if np.array_equal(a, curr_pt) or np.array_equal(b, curr_pt):
|
|
84
|
+
continue
|
|
85
|
+
# add the weighted cosine to the list
|
|
86
|
+
wcos_list.append(_wcos(curr_pt, a, b))
|
|
87
|
+
return np.var(wcos_list)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# noinspection PyPep8Naming
|
|
91
|
+
class ABOD(BaseDetector):
|
|
92
|
+
"""ABOD class for Angle-base Outlier Detection.
|
|
93
|
+
For an observation, the variance of its weighted cosine scores to all
|
|
94
|
+
neighbors could be viewed as the outlying score.
|
|
95
|
+
See :cite:`kriegel2008angle` for details.
|
|
96
|
+
|
|
97
|
+
Two version of ABOD are supported:
|
|
98
|
+
|
|
99
|
+
- Fast ABOD: use k nearest neighbors to approximate.
|
|
100
|
+
- Original ABOD: consider all training points with high time complexity at
|
|
101
|
+
O(n^3).
|
|
102
|
+
|
|
103
|
+
Parameters
|
|
104
|
+
----------
|
|
105
|
+
contamination : float in (0., 0.5), optional (default=0.1)
|
|
106
|
+
The amount of contamination of the data set, i.e.
|
|
107
|
+
the proportion of outliers in the data set. Used when fitting to
|
|
108
|
+
define the threshold on the decision function.
|
|
109
|
+
|
|
110
|
+
n_neighbors : int, optional (default=10)
|
|
111
|
+
Number of neighbors to use by default for k neighbors queries.
|
|
112
|
+
|
|
113
|
+
method: str, optional (default='fast')
|
|
114
|
+
Valid values for metric are:
|
|
115
|
+
|
|
116
|
+
- 'fast': fast ABOD. Only consider n_neighbors of training points
|
|
117
|
+
- 'default': original ABOD with all training points, which could be
|
|
118
|
+
slow
|
|
119
|
+
|
|
120
|
+
Attributes
|
|
121
|
+
----------
|
|
122
|
+
decision_scores_ : numpy array of shape (n_samples,)
|
|
123
|
+
The outlier scores of the training data.
|
|
124
|
+
The higher, the more abnormal. Outliers tend to have higher
|
|
125
|
+
scores. This value is available once the detector is
|
|
126
|
+
fitted.
|
|
127
|
+
|
|
128
|
+
threshold_ : float
|
|
129
|
+
The threshold is based on ``contamination``. It is the
|
|
130
|
+
``n_samples * contamination`` most abnormal samples in
|
|
131
|
+
``decision_scores_``. The threshold is calculated for generating
|
|
132
|
+
binary outlier labels.
|
|
133
|
+
|
|
134
|
+
labels_ : int, either 0 or 1
|
|
135
|
+
The binary labels of the training data. 0 stands for inliers
|
|
136
|
+
and 1 for outliers/anomalies. It is generated by applying
|
|
137
|
+
``threshold_`` on ``decision_scores_``.
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
def __init__(self, contamination=0.1, n_neighbors=5, method='fast'):
|
|
141
|
+
super(ABOD, self).__init__(contamination=contamination)
|
|
142
|
+
self.method = method
|
|
143
|
+
self.n_neighbors = n_neighbors
|
|
144
|
+
|
|
145
|
+
def fit(self, X, y=None):
|
|
146
|
+
"""Fit detector. y is ignored in unsupervised methods.
|
|
147
|
+
|
|
148
|
+
Parameters
|
|
149
|
+
----------
|
|
150
|
+
X : numpy array of shape (n_samples, n_features)
|
|
151
|
+
The input samples.
|
|
152
|
+
|
|
153
|
+
y : Ignored
|
|
154
|
+
Not used, present for API consistency by convention.
|
|
155
|
+
|
|
156
|
+
Returns
|
|
157
|
+
-------
|
|
158
|
+
self : object
|
|
159
|
+
Fitted estimator.
|
|
160
|
+
"""
|
|
161
|
+
# validate inputs X and y (optional)
|
|
162
|
+
X = check_array(X)
|
|
163
|
+
self._set_n_classes(y)
|
|
164
|
+
|
|
165
|
+
self.X_train_ = X
|
|
166
|
+
self.n_train_ = X.shape[0]
|
|
167
|
+
self.decision_scores_ = np.zeros([self.n_train_, 1])
|
|
168
|
+
|
|
169
|
+
if self.method == 'fast':
|
|
170
|
+
self._fit_fast()
|
|
171
|
+
elif self.method == 'default':
|
|
172
|
+
self._fit_default()
|
|
173
|
+
else:
|
|
174
|
+
raise ValueError(self.method, "is not a valid method")
|
|
175
|
+
|
|
176
|
+
# flip the scores
|
|
177
|
+
self.decision_scores_ = self.decision_scores_.ravel() * -1
|
|
178
|
+
self._process_decision_scores()
|
|
179
|
+
return self
|
|
180
|
+
|
|
181
|
+
def _fit_default(self):
|
|
182
|
+
"""Default ABOD method. Use all training points with high complexity
|
|
183
|
+
O(n^3). For internal use only.
|
|
184
|
+
"""
|
|
185
|
+
for i in range(self.n_train_):
|
|
186
|
+
curr_pt = self.X_train_[i, :]
|
|
187
|
+
|
|
188
|
+
# get the index pairs of the neighbors, remove itself from index
|
|
189
|
+
X_ind = list(range(0, self.n_train_))
|
|
190
|
+
X_ind.remove(i)
|
|
191
|
+
|
|
192
|
+
self.decision_scores_[i, 0] = _calculate_wocs(curr_pt,
|
|
193
|
+
self.X_train_,
|
|
194
|
+
X_ind)
|
|
195
|
+
return self
|
|
196
|
+
|
|
197
|
+
def _fit_fast(self):
|
|
198
|
+
"""Fast ABOD method. Only use n_neighbors for angle calculation.
|
|
199
|
+
Internal use only
|
|
200
|
+
"""
|
|
201
|
+
|
|
202
|
+
# make sure the n_neighbors is in the range
|
|
203
|
+
if self.n_neighbors >= self.n_train_:
|
|
204
|
+
self.n_neighbors = self.n_train_ - 1
|
|
205
|
+
warnings.warn("n_neighbors is set to the number of "
|
|
206
|
+
"training points minus 1: {0}".format(self.n_train_))
|
|
207
|
+
|
|
208
|
+
check_parameter(self.n_neighbors, 1, self.n_train_,
|
|
209
|
+
include_left=True, include_right=True)
|
|
210
|
+
|
|
211
|
+
self.tree_ = KDTree(self.X_train_)
|
|
212
|
+
|
|
213
|
+
neigh = NearestNeighbors(n_neighbors=self.n_neighbors)
|
|
214
|
+
neigh.fit(self.X_train_)
|
|
215
|
+
ind_arr = neigh.kneighbors(n_neighbors=self.n_neighbors,
|
|
216
|
+
return_distance=False)
|
|
217
|
+
|
|
218
|
+
for i in range(self.n_train_):
|
|
219
|
+
curr_pt = self.X_train_[i, :]
|
|
220
|
+
X_ind = ind_arr[i, :]
|
|
221
|
+
self.decision_scores_[i, 0] = _calculate_wocs(curr_pt,
|
|
222
|
+
self.X_train_,
|
|
223
|
+
X_ind)
|
|
224
|
+
return self
|
|
225
|
+
|
|
226
|
+
# noinspection PyPep8Naming
|
|
227
|
+
def decision_function(self, X):
|
|
228
|
+
"""Predict raw anomaly score of X using the fitted detector.
|
|
229
|
+
|
|
230
|
+
The anomaly score of an input sample is computed based on different
|
|
231
|
+
detector algorithms. For consistency, outliers are assigned with
|
|
232
|
+
larger anomaly scores.
|
|
233
|
+
|
|
234
|
+
Parameters
|
|
235
|
+
----------
|
|
236
|
+
X : numpy array of shape (n_samples, n_features)
|
|
237
|
+
The training input samples. Sparse matrices are accepted only
|
|
238
|
+
if they are supported by the base estimator.
|
|
239
|
+
|
|
240
|
+
Returns
|
|
241
|
+
-------
|
|
242
|
+
anomaly_scores : numpy array of shape (n_samples,)
|
|
243
|
+
The anomaly score of the input samples.
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
check_is_fitted(self, ['X_train_', 'n_train_', 'decision_scores_',
|
|
247
|
+
'threshold_', 'labels_'])
|
|
248
|
+
X = check_array(X)
|
|
249
|
+
|
|
250
|
+
if self.method == 'fast': # fast ABOD
|
|
251
|
+
# outliers have higher outlier scores
|
|
252
|
+
return self._decision_function_fast(X) * -1
|
|
253
|
+
else: # default ABOD
|
|
254
|
+
return self._decision_function_default(X) * -1
|
|
255
|
+
|
|
256
|
+
def _decision_function_default(self, X):
|
|
257
|
+
"""Internal method for predicting outlier scores using default ABOD.
|
|
258
|
+
|
|
259
|
+
Parameters
|
|
260
|
+
----------
|
|
261
|
+
X : numpy array of shape (n_samples, n_features)
|
|
262
|
+
The training input samples.
|
|
263
|
+
|
|
264
|
+
Returns
|
|
265
|
+
-------
|
|
266
|
+
pred_score : array, shape (n_samples,)
|
|
267
|
+
The anomaly score of the input samples.
|
|
268
|
+
|
|
269
|
+
"""
|
|
270
|
+
# initialize the output score
|
|
271
|
+
pred_score = np.zeros([X.shape[0], 1])
|
|
272
|
+
|
|
273
|
+
for i in range(X.shape[0]):
|
|
274
|
+
curr_pt = X[i, :]
|
|
275
|
+
# get the index pairs of the neighbors
|
|
276
|
+
X_ind = list(range(0, self.n_train_))
|
|
277
|
+
pred_score[i, :] = _calculate_wocs(curr_pt, self.X_train_, X_ind)
|
|
278
|
+
|
|
279
|
+
return pred_score.ravel()
|
|
280
|
+
|
|
281
|
+
def _decision_function_fast(self, X):
|
|
282
|
+
"""Internal method for predicting outlier scores using Fast ABOD.
|
|
283
|
+
|
|
284
|
+
Parameters
|
|
285
|
+
----------
|
|
286
|
+
X : numpy array of shape (n_samples, n_features)
|
|
287
|
+
The training input samples.
|
|
288
|
+
|
|
289
|
+
Returns
|
|
290
|
+
-------
|
|
291
|
+
pred_score : array, shape (n_samples,)
|
|
292
|
+
The anomaly score of the input samples.
|
|
293
|
+
|
|
294
|
+
"""
|
|
295
|
+
|
|
296
|
+
check_is_fitted(self, ['tree_'])
|
|
297
|
+
# initialize the output score
|
|
298
|
+
pred_score = np.zeros([X.shape[0], 1])
|
|
299
|
+
|
|
300
|
+
# get the indexes of the X's k nearest training points
|
|
301
|
+
_, ind_arr = self.tree_.query(X, k=self.n_neighbors)
|
|
302
|
+
|
|
303
|
+
for i in range(X.shape[0]):
|
|
304
|
+
curr_pt = X[i, :]
|
|
305
|
+
X_ind = ind_arr[i, :]
|
|
306
|
+
pred_score[i, :] = _calculate_wocs(curr_pt, self.X_train_, X_ind)
|
|
307
|
+
|
|
308
|
+
return pred_score.ravel()
|