pyod 2.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. pyod/__init__.py +7 -0
  2. pyod/models/__init__.py +26 -0
  3. pyod/models/abod.py +308 -0
  4. pyod/models/ae1svm.py +380 -0
  5. pyod/models/alad.py +494 -0
  6. pyod/models/anogan.py +444 -0
  7. pyod/models/auto_encoder.py +220 -0
  8. pyod/models/base.py +713 -0
  9. pyod/models/base_dl.py +435 -0
  10. pyod/models/cblof.py +332 -0
  11. pyod/models/cd.py +202 -0
  12. pyod/models/cof.py +215 -0
  13. pyod/models/combination.py +177 -0
  14. pyod/models/copod.py +287 -0
  15. pyod/models/deep_svdd.py +399 -0
  16. pyod/models/devnet.py +335 -0
  17. pyod/models/dif.py +456 -0
  18. pyod/models/ecod.py +295 -0
  19. pyod/models/feature_bagging.py +409 -0
  20. pyod/models/gaal_base.py +83 -0
  21. pyod/models/gmm.py +281 -0
  22. pyod/models/hbos.py +351 -0
  23. pyod/models/iforest.py +322 -0
  24. pyod/models/inne.py +252 -0
  25. pyod/models/kde.py +184 -0
  26. pyod/models/knn.py +277 -0
  27. pyod/models/kpca.py +393 -0
  28. pyod/models/lmdd.py +218 -0
  29. pyod/models/loci.py +246 -0
  30. pyod/models/loda.py +204 -0
  31. pyod/models/lof.py +225 -0
  32. pyod/models/lscp.py +408 -0
  33. pyod/models/lunar.py +368 -0
  34. pyod/models/mad.py +150 -0
  35. pyod/models/mcd.py +236 -0
  36. pyod/models/mo_gaal.py +287 -0
  37. pyod/models/ocsvm.py +230 -0
  38. pyod/models/pca.py +354 -0
  39. pyod/models/qmcd.py +156 -0
  40. pyod/models/rgraph.py +559 -0
  41. pyod/models/rod.py +450 -0
  42. pyod/models/sampling.py +192 -0
  43. pyod/models/sklearn_base.py +105 -0
  44. pyod/models/so_gaal.py +233 -0
  45. pyod/models/so_gaal_new.py +175 -0
  46. pyod/models/sod.py +199 -0
  47. pyod/models/sos.py +306 -0
  48. pyod/models/suod.py +267 -0
  49. pyod/models/thresholds.py +665 -0
  50. pyod/models/vae.py +339 -0
  51. pyod/models/xgbod.py +458 -0
  52. pyod/utils/__init__.py +28 -0
  53. pyod/utils/data.py +652 -0
  54. pyod/utils/example.py +201 -0
  55. pyod/utils/stat_models.py +251 -0
  56. pyod/utils/torch_utility.py +457 -0
  57. pyod/utils/utility.py +588 -0
  58. pyod/version.py +23 -0
  59. pyod-2.0.5.dist-info/METADATA +668 -0
  60. pyod-2.0.5.dist-info/RECORD +63 -0
  61. pyod-2.0.5.dist-info/WHEEL +5 -0
  62. pyod-2.0.5.dist-info/licenses/LICENSE +25 -0
  63. pyod-2.0.5.dist-info/top_level.txt +1 -0
pyod/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from . import models
4
+ from . import utils
5
+ from .version import __version__
6
+
7
+ __all__ = ['models', 'utils', '__version__']
@@ -0,0 +1,26 @@
1
+ # -*- coding: utf-8 -*-
2
+ # from .abod import ABOD
3
+ # from .auto_encoder import AutoEncoder
4
+ # from .cblof import CBLOF
5
+ # from .combination import aom, moa, average, maximization
6
+ # from .feature_bagging import FeatureBagging
7
+ # from .hbos import HBOS
8
+ # from .iforest import IForest
9
+ # from .knn import KNN
10
+ # from .lof import LOF
11
+ # from .mcd import MCD
12
+ # from .ocsvm import OCSVM
13
+ # from .pca import PCA
14
+ #
15
+ # __all__ = ['ABOD',
16
+ # 'AutoEncoder',
17
+ # 'CBLOF',
18
+ # 'aom', 'moa', 'average', 'maximization',
19
+ # 'FeatureBagging',
20
+ # 'HBOS',
21
+ # 'IForest',
22
+ # 'KNN',
23
+ # 'LOF',
24
+ # 'MCD',
25
+ # 'OCSVM',
26
+ # 'PCA']
pyod/models/abod.py ADDED
@@ -0,0 +1,308 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Angle-based Outlier Detector (ABOD)
3
+ """
4
+ # Author: Yue Zhao <yzhao062@gmail.com>
5
+ # License: BSD 2 clause
6
+
7
+
8
+ import warnings
9
+ from itertools import combinations
10
+
11
+ import numpy as np
12
+ from numba import njit
13
+ from sklearn.neighbors import KDTree
14
+ from sklearn.neighbors import NearestNeighbors
15
+ from sklearn.utils import check_array
16
+ from sklearn.utils.validation import check_is_fitted
17
+
18
+ from .base import BaseDetector
19
+ from ..utils.utility import check_parameter
20
+
21
+
22
+ @njit
23
+ def _wcos(curr_pt, a, b): # pragma: no cover
24
+ """Internal function to calculate weighted cosine using optimized
25
+ numba code.
26
+
27
+ Parameters
28
+ ----------
29
+ curr_pt : numpy array of shape (n_samples, n_features)
30
+ Current sample to be calculated.
31
+
32
+ a : numpy array of shape (n_samples, n_features)
33
+ Training sample a.
34
+
35
+ b : numpy array of shape (n_samples, n_features)
36
+ Training sample b.
37
+
38
+ Returns
39
+ -------
40
+ wcos : float in range [-1, 1]
41
+ Cosine similarity between a-curr_pt and b-curr_pt.
42
+
43
+ """
44
+
45
+ a_curr = a - curr_pt
46
+ b_curr = b - curr_pt
47
+
48
+ # wcos = (<a_curr, b_curr>/((|a_curr|*|b_curr|)^2)
49
+ wcos = np.dot(a_curr, b_curr) / (
50
+ np.linalg.norm(a_curr, 2) ** 2) / (
51
+ np.linalg.norm(b_curr, 2) ** 2)
52
+ return wcos
53
+
54
+
55
+ def _calculate_wocs(curr_pt, X, X_ind):
56
+ """Calculated the variance of weighted cosine of a point.
57
+ wcos = (<a_curr, b_curr>/((|a_curr|*|b_curr|)^2)
58
+
59
+ Parameters
60
+ ----------
61
+ curr_pt : numpy array, shape (1, n_features)
62
+ The sample to be calculated.
63
+
64
+ X : numpy array of shape (n_samples, n_features)
65
+ The training dataset.
66
+
67
+ X_ind : list
68
+ The valid index of the training data.
69
+
70
+ Returns
71
+ -------
72
+ cos_angle_var : float
73
+ The variance of cosine angle
74
+
75
+ """
76
+ wcos_list = []
77
+ curr_pair_inds = list(combinations(X_ind, 2))
78
+ for j, (a_ind, b_ind) in enumerate(curr_pair_inds):
79
+ a = X[a_ind, :]
80
+ b = X[b_ind, :]
81
+
82
+ # skip if no angle can be formed
83
+ if np.array_equal(a, curr_pt) or np.array_equal(b, curr_pt):
84
+ continue
85
+ # add the weighted cosine to the list
86
+ wcos_list.append(_wcos(curr_pt, a, b))
87
+ return np.var(wcos_list)
88
+
89
+
90
+ # noinspection PyPep8Naming
91
+ class ABOD(BaseDetector):
92
+ """ABOD class for Angle-base Outlier Detection.
93
+ For an observation, the variance of its weighted cosine scores to all
94
+ neighbors could be viewed as the outlying score.
95
+ See :cite:`kriegel2008angle` for details.
96
+
97
+ Two version of ABOD are supported:
98
+
99
+ - Fast ABOD: use k nearest neighbors to approximate.
100
+ - Original ABOD: consider all training points with high time complexity at
101
+ O(n^3).
102
+
103
+ Parameters
104
+ ----------
105
+ contamination : float in (0., 0.5), optional (default=0.1)
106
+ The amount of contamination of the data set, i.e.
107
+ the proportion of outliers in the data set. Used when fitting to
108
+ define the threshold on the decision function.
109
+
110
+ n_neighbors : int, optional (default=10)
111
+ Number of neighbors to use by default for k neighbors queries.
112
+
113
+ method: str, optional (default='fast')
114
+ Valid values for metric are:
115
+
116
+ - 'fast': fast ABOD. Only consider n_neighbors of training points
117
+ - 'default': original ABOD with all training points, which could be
118
+ slow
119
+
120
+ Attributes
121
+ ----------
122
+ decision_scores_ : numpy array of shape (n_samples,)
123
+ The outlier scores of the training data.
124
+ The higher, the more abnormal. Outliers tend to have higher
125
+ scores. This value is available once the detector is
126
+ fitted.
127
+
128
+ threshold_ : float
129
+ The threshold is based on ``contamination``. It is the
130
+ ``n_samples * contamination`` most abnormal samples in
131
+ ``decision_scores_``. The threshold is calculated for generating
132
+ binary outlier labels.
133
+
134
+ labels_ : int, either 0 or 1
135
+ The binary labels of the training data. 0 stands for inliers
136
+ and 1 for outliers/anomalies. It is generated by applying
137
+ ``threshold_`` on ``decision_scores_``.
138
+ """
139
+
140
+ def __init__(self, contamination=0.1, n_neighbors=5, method='fast'):
141
+ super(ABOD, self).__init__(contamination=contamination)
142
+ self.method = method
143
+ self.n_neighbors = n_neighbors
144
+
145
+ def fit(self, X, y=None):
146
+ """Fit detector. y is ignored in unsupervised methods.
147
+
148
+ Parameters
149
+ ----------
150
+ X : numpy array of shape (n_samples, n_features)
151
+ The input samples.
152
+
153
+ y : Ignored
154
+ Not used, present for API consistency by convention.
155
+
156
+ Returns
157
+ -------
158
+ self : object
159
+ Fitted estimator.
160
+ """
161
+ # validate inputs X and y (optional)
162
+ X = check_array(X)
163
+ self._set_n_classes(y)
164
+
165
+ self.X_train_ = X
166
+ self.n_train_ = X.shape[0]
167
+ self.decision_scores_ = np.zeros([self.n_train_, 1])
168
+
169
+ if self.method == 'fast':
170
+ self._fit_fast()
171
+ elif self.method == 'default':
172
+ self._fit_default()
173
+ else:
174
+ raise ValueError(self.method, "is not a valid method")
175
+
176
+ # flip the scores
177
+ self.decision_scores_ = self.decision_scores_.ravel() * -1
178
+ self._process_decision_scores()
179
+ return self
180
+
181
+ def _fit_default(self):
182
+ """Default ABOD method. Use all training points with high complexity
183
+ O(n^3). For internal use only.
184
+ """
185
+ for i in range(self.n_train_):
186
+ curr_pt = self.X_train_[i, :]
187
+
188
+ # get the index pairs of the neighbors, remove itself from index
189
+ X_ind = list(range(0, self.n_train_))
190
+ X_ind.remove(i)
191
+
192
+ self.decision_scores_[i, 0] = _calculate_wocs(curr_pt,
193
+ self.X_train_,
194
+ X_ind)
195
+ return self
196
+
197
+ def _fit_fast(self):
198
+ """Fast ABOD method. Only use n_neighbors for angle calculation.
199
+ Internal use only
200
+ """
201
+
202
+ # make sure the n_neighbors is in the range
203
+ if self.n_neighbors >= self.n_train_:
204
+ self.n_neighbors = self.n_train_ - 1
205
+ warnings.warn("n_neighbors is set to the number of "
206
+ "training points minus 1: {0}".format(self.n_train_))
207
+
208
+ check_parameter(self.n_neighbors, 1, self.n_train_,
209
+ include_left=True, include_right=True)
210
+
211
+ self.tree_ = KDTree(self.X_train_)
212
+
213
+ neigh = NearestNeighbors(n_neighbors=self.n_neighbors)
214
+ neigh.fit(self.X_train_)
215
+ ind_arr = neigh.kneighbors(n_neighbors=self.n_neighbors,
216
+ return_distance=False)
217
+
218
+ for i in range(self.n_train_):
219
+ curr_pt = self.X_train_[i, :]
220
+ X_ind = ind_arr[i, :]
221
+ self.decision_scores_[i, 0] = _calculate_wocs(curr_pt,
222
+ self.X_train_,
223
+ X_ind)
224
+ return self
225
+
226
+ # noinspection PyPep8Naming
227
+ def decision_function(self, X):
228
+ """Predict raw anomaly score of X using the fitted detector.
229
+
230
+ The anomaly score of an input sample is computed based on different
231
+ detector algorithms. For consistency, outliers are assigned with
232
+ larger anomaly scores.
233
+
234
+ Parameters
235
+ ----------
236
+ X : numpy array of shape (n_samples, n_features)
237
+ The training input samples. Sparse matrices are accepted only
238
+ if they are supported by the base estimator.
239
+
240
+ Returns
241
+ -------
242
+ anomaly_scores : numpy array of shape (n_samples,)
243
+ The anomaly score of the input samples.
244
+ """
245
+
246
+ check_is_fitted(self, ['X_train_', 'n_train_', 'decision_scores_',
247
+ 'threshold_', 'labels_'])
248
+ X = check_array(X)
249
+
250
+ if self.method == 'fast': # fast ABOD
251
+ # outliers have higher outlier scores
252
+ return self._decision_function_fast(X) * -1
253
+ else: # default ABOD
254
+ return self._decision_function_default(X) * -1
255
+
256
+ def _decision_function_default(self, X):
257
+ """Internal method for predicting outlier scores using default ABOD.
258
+
259
+ Parameters
260
+ ----------
261
+ X : numpy array of shape (n_samples, n_features)
262
+ The training input samples.
263
+
264
+ Returns
265
+ -------
266
+ pred_score : array, shape (n_samples,)
267
+ The anomaly score of the input samples.
268
+
269
+ """
270
+ # initialize the output score
271
+ pred_score = np.zeros([X.shape[0], 1])
272
+
273
+ for i in range(X.shape[0]):
274
+ curr_pt = X[i, :]
275
+ # get the index pairs of the neighbors
276
+ X_ind = list(range(0, self.n_train_))
277
+ pred_score[i, :] = _calculate_wocs(curr_pt, self.X_train_, X_ind)
278
+
279
+ return pred_score.ravel()
280
+
281
+ def _decision_function_fast(self, X):
282
+ """Internal method for predicting outlier scores using Fast ABOD.
283
+
284
+ Parameters
285
+ ----------
286
+ X : numpy array of shape (n_samples, n_features)
287
+ The training input samples.
288
+
289
+ Returns
290
+ -------
291
+ pred_score : array, shape (n_samples,)
292
+ The anomaly score of the input samples.
293
+
294
+ """
295
+
296
+ check_is_fitted(self, ['tree_'])
297
+ # initialize the output score
298
+ pred_score = np.zeros([X.shape[0], 1])
299
+
300
+ # get the indexes of the X's k nearest training points
301
+ _, ind_arr = self.tree_.query(X, k=self.n_neighbors)
302
+
303
+ for i in range(X.shape[0]):
304
+ curr_pt = X[i, :]
305
+ X_ind = ind_arr[i, :]
306
+ pred_score[i, :] = _calculate_wocs(curr_pt, self.X_train_, X_ind)
307
+
308
+ return pred_score.ravel()