PyNomaly 0.3.3__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 1.1
2
2
  Name: PyNomaly
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: A Python 3 implementation of LoOP: Local Outlier Probabilities, a local density based outlier detection method providing an outlier score in the range of [0,1].
5
5
  Home-page: https://github.com/vc1492a/PyNomaly
6
6
  Author: Valentino Constantinou
7
7
  Author-email: vc@valentino.io
8
8
  License: Apache License, Version 2.0
9
- Download-URL: https://github.com/vc1492a/PyNomaly/archive/0.3.3.tar.gz
9
+ Download-URL: https://github.com/vc1492a/PyNomaly/archive/0.3.4.tar.gz
10
10
  Description: UNKNOWN
11
11
  Keywords: outlier,anomaly,detection,machine,learning,probability
12
12
  Platform: UNKNOWN
@@ -10,13 +10,12 @@ try:
10
10
  except ImportError:
11
11
  pass
12
12
 
13
- __author__ = 'Valentino Constantinou'
14
- __version__ = '0.3.2'
15
- __license__ = 'Apache License, Version 2.0'
13
+ __author__ = "Valentino Constantinou"
14
+ __version__ = "0.3.4"
15
+ __license__ = "Apache License, Version 2.0"
16
16
 
17
17
 
18
18
  class Utils:
19
-
20
19
  @staticmethod
21
20
  def emit_progress_bar(progress: str, index: int, total: int) -> str:
22
21
  """
@@ -55,7 +54,7 @@ class LocalOutlierProbability(object):
55
54
  :param cluster_labels: a numpy array of cluster assignments w.r.t. each
56
55
  sample (optional, default None)
57
56
  :return:
58
- """"""
57
+ """ """
59
58
 
60
59
  Based on the work of Kriegel, Kröger, Schubert, and Zimek (2009) in LoOP:
61
60
  Local Outlier Probabilities.
@@ -93,7 +92,7 @@ class LocalOutlierProbability(object):
93
92
  """
94
93
 
95
94
  @staticmethod
96
- def _data(obj: Union['pd.DataFrame', np.ndarray]) -> np.ndarray:
95
+ def _data(obj: Union["pd.DataFrame", np.ndarray]) -> np.ndarray:
97
96
  """
98
97
  Validates the input data to ensure it is either a Pandas DataFrame
99
98
  or Numpy array.
@@ -101,24 +100,25 @@ class LocalOutlierProbability(object):
101
100
  :return: a vector of values to be used in calculating the local
102
101
  outlier probability.
103
102
  """
104
- if obj.__class__.__name__ == 'DataFrame':
103
+ if obj.__class__.__name__ == "DataFrame":
105
104
  points_vector = obj.values
106
105
  return points_vector
107
- elif obj.__class__.__name__ == 'ndarray':
106
+ elif obj.__class__.__name__ == "ndarray":
108
107
  points_vector = obj
109
108
  return points_vector
110
109
  else:
111
110
  warnings.warn(
112
111
  "Provided data or distance matrix must be in ndarray "
113
112
  "or DataFrame.",
114
- UserWarning)
113
+ UserWarning,
114
+ )
115
115
  if isinstance(obj, list):
116
116
  points_vector = np.array(obj)
117
117
  return points_vector
118
118
  points_vector = np.array([obj])
119
119
  return points_vector
120
120
 
121
- def _inputs(self, obj: 'LocalOutlierProbability'):
121
+ def _inputs(self, obj: "LocalOutlierProbability"):
122
122
  """
123
123
  Validates the inputs provided during initialization to ensure
124
124
  that the needed objects are provided.
@@ -134,35 +134,43 @@ class LocalOutlierProbability(object):
134
134
  elif all(v is not None for v in [obj.data, obj.distance_matrix]):
135
135
  warnings.warn(
136
136
  "Only one of the following may be provided: data or a "
137
- "distance matrix (not both).", UserWarning
137
+ "distance matrix (not both).",
138
+ UserWarning,
138
139
  )
139
140
  return False
140
141
  if obj.data is not None:
141
142
  points_vector = self._data(obj.data)
142
143
  return points_vector, obj.distance_matrix, obj.neighbor_matrix
143
- if all(matrix is not None for matrix in [obj.neighbor_matrix,
144
- obj.distance_matrix]):
144
+ if all(
145
+ matrix is not None
146
+ for matrix in [obj.neighbor_matrix, obj.distance_matrix]
147
+ ):
145
148
  dist_vector = self._data(obj.distance_matrix)
146
149
  neigh_vector = self._data(obj.neighbor_matrix)
147
150
  else:
148
151
  warnings.warn(
149
152
  "A neighbor index matrix and distance matrix must both be "
150
- "provided when not using raw input data.", UserWarning
153
+ "provided when not using raw input data.",
154
+ UserWarning,
151
155
  )
152
156
  return False
153
157
  if obj.distance_matrix.shape != obj.neighbor_matrix.shape:
154
158
  warnings.warn(
155
159
  "The shape of the distance and neighbor "
156
- "index matrices must match.", UserWarning
160
+ "index matrices must match.",
161
+ UserWarning,
157
162
  )
158
163
  return False
159
- elif (obj.distance_matrix.shape[1] != obj.n_neighbors) \
160
- or (obj.neighbor_matrix.shape[1] !=
161
- obj.n_neighbors):
162
- warnings.warn("The shape of the distance or "
163
- "neighbor index matrix does not "
164
- "match the number of neighbors "
165
- "specified.", UserWarning)
164
+ elif (obj.distance_matrix.shape[1] != obj.n_neighbors) or (
165
+ obj.neighbor_matrix.shape[1] != obj.n_neighbors
166
+ ):
167
+ warnings.warn(
168
+ "The shape of the distance or "
169
+ "neighbor index matrix does not "
170
+ "match the number of neighbors "
171
+ "specified.",
172
+ UserWarning,
173
+ )
166
174
  return False
167
175
  return obj.data, dist_vector, neigh_vector
168
176
 
@@ -184,7 +192,8 @@ class LocalOutlierProbability(object):
184
192
  "cluster. Specify a number of neighbors smaller than "
185
193
  "the smallest cluster size (observations in smallest "
186
194
  "cluster minus one).",
187
- UserWarning)
195
+ UserWarning,
196
+ )
188
197
  return False
189
198
  return True
190
199
 
@@ -199,17 +208,19 @@ class LocalOutlierProbability(object):
199
208
  """
200
209
  if not obj.n_neighbors > 0:
201
210
  obj.n_neighbors = 10
202
- warnings.warn("n_neighbors must be greater than 0."
203
- " Fit with " + str(obj.n_neighbors) +
204
- " instead.",
205
- UserWarning)
211
+ warnings.warn(
212
+ "n_neighbors must be greater than 0."
213
+ " Fit with " + str(obj.n_neighbors) + " instead.",
214
+ UserWarning,
215
+ )
206
216
  return False
207
217
  elif obj.n_neighbors >= obj._n_observations():
208
218
  obj.n_neighbors = obj._n_observations() - 1
209
219
  warnings.warn(
210
220
  "n_neighbors must be less than the number of observations."
211
221
  " Fit with " + str(obj.n_neighbors) + " instead.",
212
- UserWarning)
222
+ UserWarning,
223
+ )
213
224
  return True
214
225
 
215
226
  @staticmethod
@@ -222,8 +233,8 @@ class LocalOutlierProbability(object):
222
233
  """
223
234
  if obj.extent not in [1, 2, 3]:
224
235
  warnings.warn(
225
- "extent parameter (lambda) must be 1, 2, or 3.",
226
- UserWarning)
236
+ "extent parameter (lambda) must be 1, 2, or 3.", UserWarning
237
+ )
227
238
  return False
228
239
  return True
229
240
 
@@ -237,8 +248,8 @@ class LocalOutlierProbability(object):
237
248
  """
238
249
  if np.any(np.isnan(obj.data)):
239
250
  warnings.warn(
240
- "Method does not support missing values in input data.",
241
- UserWarning)
251
+ "Method does not support missing values in input data.", UserWarning
252
+ )
242
253
  return False
243
254
  return True
244
255
 
@@ -254,7 +265,8 @@ class LocalOutlierProbability(object):
254
265
  warnings.warn(
255
266
  "Must fit on historical data by calling fit() prior to "
256
267
  "calling stream(x).",
257
- UserWarning)
268
+ UserWarning,
269
+ )
258
270
  return False
259
271
  return True
260
272
 
@@ -272,7 +284,8 @@ class LocalOutlierProbability(object):
272
284
  warnings.warn(
273
285
  "Stream approach does not support clustered data. "
274
286
  "Automatically refit using single cluster of points.",
275
- UserWarning)
287
+ UserWarning,
288
+ )
276
289
  return False
277
290
  return True
278
291
 
@@ -294,43 +307,35 @@ class LocalOutlierProbability(object):
294
307
  assert len(types) == f.__code__.co_argcount
295
308
 
296
309
  def new_f(*args, **kwds):
297
- for (a, t) in zip(args, types):
298
- if type(a).__name__ == 'DataFrame':
310
+ for a, t in zip(args, types):
311
+ if type(a).__name__ == "DataFrame":
299
312
  a = np.array(a)
300
313
  if isinstance(a, t) is False:
301
- warnings.warn("Argument %r is not of type %s" % (a, t),
302
- UserWarning)
314
+ warnings.warn(
315
+ "Argument %r is not of type %s" % (a, t), UserWarning
316
+ )
303
317
  opt_types = {
304
- 'distance_matrix': {
305
- 'type': types[2]
306
- },
307
- 'neighbor_matrix': {
308
- 'type': types[3]
309
- },
310
- 'extent': {
311
- 'type': types[4]
312
- },
313
- 'n_neighbors': {
314
- 'type': types[5]
315
- },
316
- 'cluster_labels': {
317
- 'type': types[6]
318
- },
319
- 'use_numba': {
320
- 'type': types[7]
321
- },
322
- 'progress_bar': {
323
- 'type': types[8]
324
- }
318
+ "distance_matrix": {"type": types[2]},
319
+ "neighbor_matrix": {"type": types[3]},
320
+ "extent": {"type": types[4]},
321
+ "n_neighbors": {"type": types[5]},
322
+ "cluster_labels": {"type": types[6]},
323
+ "use_numba": {"type": types[7]},
324
+ "progress_bar": {"type": types[8]},
325
325
  }
326
326
  for x in kwds:
327
- opt_types[x]['value'] = kwds[x]
327
+ opt_types[x]["value"] = kwds[x]
328
328
  for k in opt_types:
329
329
  try:
330
- if isinstance(opt_types[k]['value'],
331
- opt_types[k]['type']) is False:
332
- warnings.warn("Argument %r is not of type %s." % (
333
- k, opt_types[k]['type']), UserWarning)
330
+ if (
331
+ isinstance(opt_types[k]["value"], opt_types[k]["type"])
332
+ is False
333
+ ):
334
+ warnings.warn(
335
+ "Argument %r is not of type %s."
336
+ % (k, opt_types[k]["type"]),
337
+ UserWarning,
338
+ )
334
339
  except KeyError:
335
340
  pass
336
341
  return f(*args, **kwds)
@@ -340,11 +345,28 @@ class LocalOutlierProbability(object):
340
345
 
341
346
  return decorator
342
347
 
343
- @accepts(object, np.ndarray, np.ndarray, np.ndarray, (int, np.integer),
344
- (int, np.integer), list, bool, bool)
345
- def __init__(self, data=None, distance_matrix=None, neighbor_matrix=None,
346
- extent=3, n_neighbors=10, cluster_labels=None,
347
- use_numba=False, progress_bar=False) -> None:
348
+ @accepts(
349
+ object,
350
+ np.ndarray,
351
+ np.ndarray,
352
+ np.ndarray,
353
+ (int, np.integer),
354
+ (int, np.integer),
355
+ list,
356
+ bool,
357
+ bool,
358
+ )
359
+ def __init__(
360
+ self,
361
+ data=None,
362
+ distance_matrix=None,
363
+ neighbor_matrix=None,
364
+ extent=3,
365
+ n_neighbors=10,
366
+ cluster_labels=None,
367
+ use_numba=False,
368
+ progress_bar=False,
369
+ ) -> None:
348
370
  self.data = data
349
371
  self.distance_matrix = distance_matrix
350
372
  self.neighbor_matrix = neighbor_matrix
@@ -361,11 +383,11 @@ class LocalOutlierProbability(object):
361
383
  self.progress_bar = progress_bar
362
384
  self.is_fit = False
363
385
 
364
- if self.use_numba is True and 'numba' not in sys.modules:
386
+ if self.use_numba is True and "numba" not in sys.modules:
365
387
  self.use_numba = False
366
388
  warnings.warn(
367
- "Numba is not available, falling back to pure python mode.",
368
- UserWarning)
389
+ "Numba is not available, falling back to pure python mode.", UserWarning
390
+ )
369
391
 
370
392
  self.Validate()._inputs(self)
371
393
  self.Validate._extent(self)
@@ -375,15 +397,14 @@ class LocalOutlierProbability(object):
375
397
  """
376
398
 
377
399
  @staticmethod
378
- def _standard_distance(cardinality: float, sum_squared_distance: float) \
379
- -> float:
400
+ def _standard_distance(cardinality: float, sum_squared_distance: float) -> float:
380
401
  """
381
402
  Calculates the standard distance of an observation.
382
403
  :param cardinality: the cardinality of the input observation.
383
404
  :param sum_squared_distance: the sum squared distance between all
384
405
  neighbors of the input observation.
385
406
  :return: the standard distance.
386
- # """
407
+ #"""
387
408
  division_result = sum_squared_distance / cardinality
388
409
  st_dist = sqrt(division_result)
389
410
  return st_dist
@@ -400,8 +421,9 @@ class LocalOutlierProbability(object):
400
421
  return extent * standard_distance
401
422
 
402
423
  @staticmethod
403
- def _prob_outlier_factor(probabilistic_distance: np.ndarray, ev_prob_dist:
404
- np.ndarray) -> np.ndarray:
424
+ def _prob_outlier_factor(
425
+ probabilistic_distance: np.ndarray, ev_prob_dist: np.ndarray
426
+ ) -> np.ndarray:
405
427
  """
406
428
  Calculates the probabilistic outlier factor of an observation.
407
429
  :param probabilistic_distance: the probabilistic distance of the
@@ -412,14 +434,14 @@ class LocalOutlierProbability(object):
412
434
  if np.all(probabilistic_distance == ev_prob_dist):
413
435
  return np.zeros(probabilistic_distance.shape)
414
436
  else:
415
- ev_prob_dist[ev_prob_dist == 0.] = 1.e-8
416
- result = np.divide(probabilistic_distance, ev_prob_dist) - 1.
437
+ ev_prob_dist[ev_prob_dist == 0.0] = 1.0e-8
438
+ result = np.divide(probabilistic_distance, ev_prob_dist) - 1.0
417
439
  return result
418
440
 
419
441
  @staticmethod
420
- def _norm_prob_outlier_factor(extent: float,
421
- ev_probabilistic_outlier_factor: list) \
422
- -> list:
442
+ def _norm_prob_outlier_factor(
443
+ extent: float, ev_probabilistic_outlier_factor: list
444
+ ) -> list:
423
445
  """
424
446
  Calculates the normalized probabilistic outlier factor of an
425
447
  observation.
@@ -434,8 +456,9 @@ class LocalOutlierProbability(object):
434
456
  return npofs
435
457
 
436
458
  @staticmethod
437
- def _local_outlier_probability(plof_val: np.ndarray, nplof_val: np.ndarray) \
438
- -> np.ndarray:
459
+ def _local_outlier_probability(
460
+ plof_val: np.ndarray, nplof_val: np.ndarray
461
+ ) -> np.ndarray:
439
462
  """
440
463
  Calculates the local outlier probability of an observation.
441
464
  :param plof_val: the probabilistic outlier factor of the input
@@ -448,7 +471,7 @@ class LocalOutlierProbability(object):
448
471
  if np.all(plof_val == nplof_val):
449
472
  return np.zeros(plof_val.shape)
450
473
  else:
451
- return np.maximum(0, erf_vec(plof_val / (nplof_val * np.sqrt(2.))))
474
+ return np.maximum(0, erf_vec(plof_val / (nplof_val * np.sqrt(2.0))))
452
475
 
453
476
  def _n_observations(self) -> int:
454
477
  """
@@ -502,8 +525,9 @@ class LocalOutlierProbability(object):
502
525
  :return: the updated storage matrix that collects information on
503
526
  each observation.
504
527
  """
505
- for vec, cluster_id in zip(range(self.distance_matrix.shape[0]),
506
- self._cluster_labels()):
528
+ for vec, cluster_id in zip(
529
+ range(self.distance_matrix.shape[0]), self._cluster_labels()
530
+ ):
507
531
  data_store[vec][0] = cluster_id
508
532
  data_store[vec][1] = self.distance_matrix[vec]
509
533
  data_store[vec][2] = self.neighbor_matrix[vec]
@@ -511,10 +535,10 @@ class LocalOutlierProbability(object):
511
535
 
512
536
  @staticmethod
513
537
  def _compute_distance_and_neighbor_matrix(
514
- clust_points_vector: np.ndarray,
515
- indices: np.ndarray,
516
- distances: np.ndarray,
517
- indexes: np.ndarray
538
+ clust_points_vector: np.ndarray,
539
+ indices: np.ndarray,
540
+ distances: np.ndarray,
541
+ indexes: np.ndarray,
518
542
  ) -> Tuple[np.ndarray, np.ndarray, int]:
519
543
  """
520
544
  This helper method provides the heavy lifting for the _distances
@@ -522,27 +546,27 @@ class LocalOutlierProbability(object):
522
546
  written so that it can make full use of Numba's jit capabilities if
523
547
  desired.
524
548
  """
525
-
526
549
  for i in range(clust_points_vector.shape[0]):
527
550
  for j in range(i + 1, clust_points_vector.shape[0]):
528
- p = ((i,), (j,))
551
+ # Global index of the points
552
+ global_i = indices[0][i]
553
+ global_j = indices[0][j]
529
554
 
530
- diff = clust_points_vector[p[0]] - clust_points_vector[p[1]]
555
+ # Compute Euclidean distance
556
+ diff = clust_points_vector[i] - clust_points_vector[j]
531
557
  d = np.dot(diff, diff) ** 0.5
532
558
 
533
- idx = indices[0][p[0]]
534
- idx_max = distances[idx].argmax()
559
+ # Update distance and neighbor index for global_i
560
+ idx_max = distances[global_i].argmax()
561
+ if d < distances[global_i][idx_max]:
562
+ distances[global_i][idx_max] = d
563
+ indexes[global_i][idx_max] = global_j
535
564
 
536
- if d < distances[idx][idx_max]:
537
- distances[idx][idx_max] = d
538
- indexes[idx][idx_max] = p[1][0]
539
-
540
- idx = indices[0][p[1]]
541
- idx_max = distances[idx].argmax()
542
-
543
- if d < distances[idx][idx_max]:
544
- distances[idx][idx_max] = d
545
- indexes[idx][idx_max] = p[0][0]
565
+ # Update distance and neighbor index for global_j
566
+ idx_max = distances[global_j].argmax()
567
+ if d < distances[global_j][idx_max]:
568
+ distances[global_j][idx_max] = d
569
+ indexes[global_j][idx_max] = global_i
546
570
 
547
571
  yield distances, indexes, i
548
572
 
@@ -555,20 +579,21 @@ class LocalOutlierProbability(object):
555
579
  :return: the updated storage matrix that collects information on
556
580
  each observation.
557
581
  """
558
- distances = np.full([self._n_observations(), self.n_neighbors], 9e10,
559
- dtype=float)
560
- indexes = np.full([self._n_observations(), self.n_neighbors], 9e10,
561
- dtype=float)
582
+ distances = np.full(
583
+ [self._n_observations(), self.n_neighbors], 9e10, dtype=float
584
+ )
585
+ indexes = np.full([self._n_observations(), self.n_neighbors], 9e10, dtype=float)
562
586
  self.points_vector = self.Validate._data(self.data)
563
- compute = numba.jit(self._compute_distance_and_neighbor_matrix,
564
- cache=True) if self.use_numba else \
565
- self._compute_distance_and_neighbor_matrix
587
+ compute = (
588
+ numba.jit(self._compute_distance_and_neighbor_matrix, cache=True)
589
+ if self.use_numba
590
+ else self._compute_distance_and_neighbor_matrix
591
+ )
566
592
  progress = "="
567
593
  for cluster_id in set(self._cluster_labels()):
568
594
  indices = np.where(self._cluster_labels() == cluster_id)
569
595
  clust_points_vector = np.array(
570
- self.points_vector.take(indices, axis=0)[0],
571
- dtype=np.float64
596
+ self.points_vector.take(indices, axis=0)[0], dtype=np.float64
572
597
  )
573
598
  # a generator that yields an updated distance matrix on each loop
574
599
  for c in compute(clust_points_vector, indices, distances, indexes):
@@ -576,7 +601,8 @@ class LocalOutlierProbability(object):
576
601
  # update the progress bar
577
602
  if progress_bar is True:
578
603
  progress = Utils.emit_progress_bar(
579
- progress, i+1, clust_points_vector.shape[0])
604
+ progress, i + 1, clust_points_vector.shape[0]
605
+ )
580
606
 
581
607
  self.distance_matrix = distances
582
608
  self.neighbor_matrix = indexes
@@ -630,11 +656,10 @@ class LocalOutlierProbability(object):
630
656
  """
631
657
  prob_distances = []
632
658
  for i in range(data_store[:, 4].shape[0]):
633
- prob_distances.append(
634
- self._prob_distance(self.extent, data_store[:, 4][i]))
659
+ prob_distances.append(self._prob_distance(self.extent, data_store[:, 4][i]))
635
660
  return np.hstack((data_store, np.array([prob_distances]).T))
636
661
 
637
- def _prob_distances_ev(self, data_store: np.ndarray) -> np.ndarray:
662
+ def _prob_distances_ev(self, data_store) -> np.ndarray:
638
663
  """
639
664
  Calculates the expected value of the probabilistic distance for
640
665
  each observation in the input data with respect to the cluster the
@@ -648,19 +673,20 @@ class LocalOutlierProbability(object):
648
673
  for cluster_id in self.cluster_labels_u:
649
674
  indices = np.where(data_store[:, 0] == cluster_id)[0]
650
675
  for index in indices:
651
- nbrhood = data_store[index][2].astype(int)
652
- nbrhood_prob_distances = np.take(data_store[:, 5],
653
- nbrhood).astype(float)
676
+ # Global neighbor indices for the current point
677
+ nbrhood = data_store[index][2].astype(int) # Ensure global indices
678
+ nbrhood_prob_distances = np.take(data_store[:, 5], nbrhood).astype(
679
+ float
680
+ )
654
681
  nbrhood_prob_distances_nonan = nbrhood_prob_distances[
655
- np.logical_not(np.isnan(nbrhood_prob_distances))]
656
- prob_set_distance_ev[index] = \
657
- nbrhood_prob_distances_nonan.mean()
682
+ np.logical_not(np.isnan(nbrhood_prob_distances))
683
+ ]
684
+ prob_set_distance_ev[index] = nbrhood_prob_distances_nonan.mean()
685
+
658
686
  self.prob_distances_ev = prob_set_distance_ev
659
- data_store = np.hstack((data_store, prob_set_distance_ev))
660
- return data_store
687
+ return np.hstack((data_store, prob_set_distance_ev))
661
688
 
662
- def _prob_local_outlier_factors(self,
663
- data_store: np.ndarray) -> np.ndarray:
689
+ def _prob_local_outlier_factors(self, data_store: np.ndarray) -> np.ndarray:
664
690
  """
665
691
  Calculates the probabilistic local outlier factor for each
666
692
  observation in the input data.
@@ -670,13 +696,22 @@ class LocalOutlierProbability(object):
670
696
  each observation.
671
697
  """
672
698
  return np.hstack(
673
- (data_store,
674
- np.array([np.apply_along_axis(self._prob_outlier_factor, 0,
675
- data_store[:, 5],
676
- data_store[:, 6])]).T))
699
+ (
700
+ data_store,
701
+ np.array(
702
+ [
703
+ np.apply_along_axis(
704
+ self._prob_outlier_factor,
705
+ 0,
706
+ data_store[:, 5],
707
+ data_store[:, 6],
708
+ )
709
+ ]
710
+ ).T,
711
+ )
712
+ )
677
713
 
678
- def _prob_local_outlier_factors_ev(self,
679
- data_store: np.ndarray) -> np.ndarray:
714
+ def _prob_local_outlier_factors_ev(self, data_store: np.ndarray) -> np.ndarray:
680
715
  """
681
716
  Calculates the expected value of the probabilistic local outlier factor
682
717
  for each observation in the input data with respect to the cluster the
@@ -689,21 +724,31 @@ class LocalOutlierProbability(object):
689
724
  prob_local_outlier_factor_ev_dict = {}
690
725
  for cluster_id in self.cluster_labels_u:
691
726
  indices = np.where(data_store[:, 0] == cluster_id)
692
- prob_local_outlier_factors = np.take(data_store[:, 7],
693
- indices).astype(float)
694
- prob_local_outlier_factors_nonan = prob_local_outlier_factors[
695
- np.logical_not(np.isnan(prob_local_outlier_factors))]
696
- prob_local_outlier_factor_ev_dict[cluster_id] = (
697
- np.power(prob_local_outlier_factors_nonan, 2).sum() /
698
- float(prob_local_outlier_factors_nonan.size)
727
+ prob_local_outlier_factors = np.take(data_store[:, 7], indices).astype(
728
+ float
699
729
  )
730
+ prob_local_outlier_factors_nonan = prob_local_outlier_factors[
731
+ np.logical_not(np.isnan(prob_local_outlier_factors))
732
+ ]
733
+ prob_local_outlier_factor_ev_dict[cluster_id] = np.power(
734
+ prob_local_outlier_factors_nonan, 2
735
+ ).sum() / float(prob_local_outlier_factors_nonan.size)
700
736
  data_store = np.hstack(
701
- (data_store, np.array([[prob_local_outlier_factor_ev_dict[x] for x
702
- in data_store[:, 0].tolist()]]).T))
737
+ (
738
+ data_store,
739
+ np.array(
740
+ [
741
+ [
742
+ prob_local_outlier_factor_ev_dict[x]
743
+ for x in data_store[:, 0].tolist()
744
+ ]
745
+ ]
746
+ ).T,
747
+ )
748
+ )
703
749
  return data_store
704
750
 
705
- def _norm_prob_local_outlier_factors(self, data_store: np.ndarray) \
706
- -> np.ndarray:
751
+ def _norm_prob_local_outlier_factors(self, data_store: np.ndarray) -> np.ndarray:
707
752
  """
708
753
  Calculates the normalized probabilistic local outlier factor for each
709
754
  observation in the input data.
@@ -712,11 +757,20 @@ class LocalOutlierProbability(object):
712
757
  :return: the updated storage matrix that collects information on
713
758
  each observation.
714
759
  """
715
- return np.hstack((data_store, np.array([self._norm_prob_outlier_factor(
716
- self.extent, data_store[:, 8].tolist())]).T))
760
+ return np.hstack(
761
+ (
762
+ data_store,
763
+ np.array(
764
+ [
765
+ self._norm_prob_outlier_factor(
766
+ self.extent, data_store[:, 8].tolist()
767
+ )
768
+ ]
769
+ ).T,
770
+ )
771
+ )
717
772
 
718
- def _local_outlier_probabilities(self,
719
- data_store: np.ndarray) -> np.ndarray:
773
+ def _local_outlier_probabilities(self, data_store: np.ndarray) -> np.ndarray:
720
774
  """
721
775
  Calculates the local outlier probability for each observation in the
722
776
  input data.
@@ -726,17 +780,26 @@ class LocalOutlierProbability(object):
726
780
  each observation.
727
781
  """
728
782
  return np.hstack(
729
- (data_store,
730
- np.array([np.apply_along_axis(self._local_outlier_probability, 0,
731
- data_store[:, 7],
732
- data_store[:, 9])]).T))
783
+ (
784
+ data_store,
785
+ np.array(
786
+ [
787
+ np.apply_along_axis(
788
+ self._local_outlier_probability,
789
+ 0,
790
+ data_store[:, 7],
791
+ data_store[:, 9],
792
+ )
793
+ ]
794
+ ).T,
795
+ )
796
+ )
733
797
 
734
798
  """
735
799
  Public methods
736
800
  """
737
801
 
738
- def fit(self) -> 'LocalOutlierProbability':
739
-
802
+ def fit(self) -> "LocalOutlierProbability":
740
803
  """
741
804
  Calculates the local outlier probability for each observation in the
742
805
  input data according to the input parameters extent, n_neighbors, and
@@ -748,8 +811,7 @@ class LocalOutlierProbability(object):
748
811
  self.Validate._n_neighbors(self)
749
812
  if self.Validate._cluster_size(self) is False:
750
813
  sys.exit()
751
- if self.data is not None and self.Validate._missing_values(
752
- self) is False:
814
+ if self.data is not None and self.Validate._missing_values(self) is False:
753
815
  sys.exit()
754
816
 
755
817
  store = self._store()
@@ -773,7 +835,6 @@ class LocalOutlierProbability(object):
773
835
  return self
774
836
 
775
837
  def stream(self, x: np.ndarray) -> np.ndarray:
776
-
777
838
  """
778
839
  Calculates the local outlier probability for an individual sample
779
840
  according to the input parameters extent, n_neighbors, and
@@ -812,12 +873,12 @@ class LocalOutlierProbability(object):
812
873
  ssd = np.power(distances, 2).sum()
813
874
  std_dist = np.sqrt(np.divide(ssd, self.n_neighbors))
814
875
  prob_dist = self._prob_distance(self.extent, std_dist)
815
- plof = self._prob_outlier_factor(np.array(prob_dist),
816
- np.array(
817
- self.prob_distances_ev.mean())
818
- )
876
+ plof = self._prob_outlier_factor(
877
+ np.array(prob_dist), np.array(self.prob_distances_ev.mean())
878
+ )
819
879
  loop = self._local_outlier_probability(
820
- plof, self.norm_prob_local_outlier_factor)
880
+ plof, self.norm_prob_local_outlier_factor
881
+ )
821
882
 
822
883
  if orig_cluster_labels is not None:
823
884
  self.cluster_labels = orig_cluster_labels
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 1.1
2
2
  Name: PyNomaly
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: A Python 3 implementation of LoOP: Local Outlier Probabilities, a local density based outlier detection method providing an outlier score in the range of [0,1].
5
5
  Home-page: https://github.com/vc1492a/PyNomaly
6
6
  Author: Valentino Constantinou
7
7
  Author-email: vc@valentino.io
8
8
  License: Apache License, Version 2.0
9
- Download-URL: https://github.com/vc1492a/PyNomaly/archive/0.3.3.tar.gz
9
+ Download-URL: https://github.com/vc1492a/PyNomaly/archive/0.3.4.tar.gz
10
10
  Description: UNKNOWN
11
11
  Keywords: outlier,anomaly,detection,machine,learning,probability
12
12
  Platform: UNKNOWN
@@ -1,4 +1,3 @@
1
- setup.cfg
2
1
  setup.py
3
2
  PyNomaly/__init__.py
4
3
  PyNomaly/loop.py
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -3,14 +3,14 @@ from setuptools import setup
3
3
  setup(
4
4
  name='PyNomaly',
5
5
  packages=['PyNomaly'],
6
- version='0.3.3',
6
+ version='0.3.4',
7
7
  description='A Python 3 implementation of LoOP: Local Outlier '
8
8
  'Probabilities, a local density based outlier detection '
9
9
  'method providing an outlier score in the range of [0,1].',
10
10
  author='Valentino Constantinou',
11
11
  author_email='vc@valentino.io',
12
12
  url='https://github.com/vc1492a/PyNomaly',
13
- download_url='https://github.com/vc1492a/PyNomaly/archive/0.3.3.tar.gz',
13
+ download_url='https://github.com/vc1492a/PyNomaly/archive/0.3.4.tar.gz',
14
14
  keywords=['outlier', 'anomaly', 'detection', 'machine', 'learning',
15
15
  'probability'],
16
16
  classifiers=[],
PyNomaly-0.3.3/setup.cfg DELETED
@@ -1,7 +0,0 @@
1
- [metadata]
2
- description-file = README.md
3
-
4
- [egg_info]
5
- tag_build =
6
- tag_date = 0
7
-
File without changes