PyNomaly 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
PyNomaly/loop.py CHANGED
@@ -10,13 +10,12 @@ try:
10
10
  except ImportError:
11
11
  pass
12
12
 
13
- __author__ = 'Valentino Constantinou'
14
- __version__ = '0.3.2'
15
- __license__ = 'Apache License, Version 2.0'
13
+ __author__ = "Valentino Constantinou"
14
+ __version__ = "0.3.4"
15
+ __license__ = "Apache License, Version 2.0"
16
16
 
17
17
 
18
18
  class Utils:
19
-
20
19
  @staticmethod
21
20
  def emit_progress_bar(progress: str, index: int, total: int) -> str:
22
21
  """
@@ -32,7 +31,10 @@ class Utils:
32
31
 
33
32
  w, h = get_terminal_size()
34
33
  sys.stdout.write("\r")
35
- block_size = int(total / w)
34
+ if total < w:
35
+ block_size = int(w / total)
36
+ else:
37
+ block_size = int(total / w)
36
38
  if index % block_size == 0:
37
39
  progress += "="
38
40
  percent = index / total
@@ -52,7 +54,7 @@ class LocalOutlierProbability(object):
52
54
  :param cluster_labels: a numpy array of cluster assignments w.r.t. each
53
55
  sample (optional, default None)
54
56
  :return:
55
- """"""
57
+ """ """
56
58
 
57
59
  Based on the work of Kriegel, Kröger, Schubert, and Zimek (2009) in LoOP:
58
60
  Local Outlier Probabilities.
@@ -90,7 +92,7 @@ class LocalOutlierProbability(object):
90
92
  """
91
93
 
92
94
  @staticmethod
93
- def _data(obj: Union['pd.DataFrame', np.ndarray]) -> np.ndarray:
95
+ def _data(obj: Union["pd.DataFrame", np.ndarray]) -> np.ndarray:
94
96
  """
95
97
  Validates the input data to ensure it is either a Pandas DataFrame
96
98
  or Numpy array.
@@ -98,24 +100,25 @@ class LocalOutlierProbability(object):
98
100
  :return: a vector of values to be used in calculating the local
99
101
  outlier probability.
100
102
  """
101
- if obj.__class__.__name__ == 'DataFrame':
103
+ if obj.__class__.__name__ == "DataFrame":
102
104
  points_vector = obj.values
103
105
  return points_vector
104
- elif obj.__class__.__name__ == 'ndarray':
106
+ elif obj.__class__.__name__ == "ndarray":
105
107
  points_vector = obj
106
108
  return points_vector
107
109
  else:
108
110
  warnings.warn(
109
111
  "Provided data or distance matrix must be in ndarray "
110
112
  "or DataFrame.",
111
- UserWarning)
113
+ UserWarning,
114
+ )
112
115
  if isinstance(obj, list):
113
116
  points_vector = np.array(obj)
114
117
  return points_vector
115
118
  points_vector = np.array([obj])
116
119
  return points_vector
117
120
 
118
- def _inputs(self, obj: 'LocalOutlierProbability'):
121
+ def _inputs(self, obj: "LocalOutlierProbability"):
119
122
  """
120
123
  Validates the inputs provided during initialization to ensure
121
124
  that the needed objects are provided.
@@ -131,35 +134,43 @@ class LocalOutlierProbability(object):
131
134
  elif all(v is not None for v in [obj.data, obj.distance_matrix]):
132
135
  warnings.warn(
133
136
  "Only one of the following may be provided: data or a "
134
- "distance matrix (not both).", UserWarning
137
+ "distance matrix (not both).",
138
+ UserWarning,
135
139
  )
136
140
  return False
137
141
  if obj.data is not None:
138
142
  points_vector = self._data(obj.data)
139
143
  return points_vector, obj.distance_matrix, obj.neighbor_matrix
140
- if all(matrix is not None for matrix in [obj.neighbor_matrix,
141
- obj.distance_matrix]):
144
+ if all(
145
+ matrix is not None
146
+ for matrix in [obj.neighbor_matrix, obj.distance_matrix]
147
+ ):
142
148
  dist_vector = self._data(obj.distance_matrix)
143
149
  neigh_vector = self._data(obj.neighbor_matrix)
144
150
  else:
145
151
  warnings.warn(
146
152
  "A neighbor index matrix and distance matrix must both be "
147
- "provided when not using raw input data.", UserWarning
153
+ "provided when not using raw input data.",
154
+ UserWarning,
148
155
  )
149
156
  return False
150
157
  if obj.distance_matrix.shape != obj.neighbor_matrix.shape:
151
158
  warnings.warn(
152
159
  "The shape of the distance and neighbor "
153
- "index matrices must match.", UserWarning
160
+ "index matrices must match.",
161
+ UserWarning,
154
162
  )
155
163
  return False
156
- elif (obj.distance_matrix.shape[1] != obj.n_neighbors) \
157
- or (obj.neighbor_matrix.shape[1] !=
158
- obj.n_neighbors):
159
- warnings.warn("The shape of the distance or "
160
- "neighbor index matrix does not "
161
- "match the number of neighbors "
162
- "specified.", UserWarning)
164
+ elif (obj.distance_matrix.shape[1] != obj.n_neighbors) or (
165
+ obj.neighbor_matrix.shape[1] != obj.n_neighbors
166
+ ):
167
+ warnings.warn(
168
+ "The shape of the distance or "
169
+ "neighbor index matrix does not "
170
+ "match the number of neighbors "
171
+ "specified.",
172
+ UserWarning,
173
+ )
163
174
  return False
164
175
  return obj.data, dist_vector, neigh_vector
165
176
 
@@ -181,7 +192,8 @@ class LocalOutlierProbability(object):
181
192
  "cluster. Specify a number of neighbors smaller than "
182
193
  "the smallest cluster size (observations in smallest "
183
194
  "cluster minus one).",
184
- UserWarning)
195
+ UserWarning,
196
+ )
185
197
  return False
186
198
  return True
187
199
 
@@ -196,17 +208,19 @@ class LocalOutlierProbability(object):
196
208
  """
197
209
  if not obj.n_neighbors > 0:
198
210
  obj.n_neighbors = 10
199
- warnings.warn("n_neighbors must be greater than 0."
200
- " Fit with " + str(obj.n_neighbors) +
201
- " instead.",
202
- UserWarning)
211
+ warnings.warn(
212
+ "n_neighbors must be greater than 0."
213
+ " Fit with " + str(obj.n_neighbors) + " instead.",
214
+ UserWarning,
215
+ )
203
216
  return False
204
217
  elif obj.n_neighbors >= obj._n_observations():
205
218
  obj.n_neighbors = obj._n_observations() - 1
206
219
  warnings.warn(
207
220
  "n_neighbors must be less than the number of observations."
208
221
  " Fit with " + str(obj.n_neighbors) + " instead.",
209
- UserWarning)
222
+ UserWarning,
223
+ )
210
224
  return True
211
225
 
212
226
  @staticmethod
@@ -219,8 +233,8 @@ class LocalOutlierProbability(object):
219
233
  """
220
234
  if obj.extent not in [1, 2, 3]:
221
235
  warnings.warn(
222
- "extent parameter (lambda) must be 1, 2, or 3.",
223
- UserWarning)
236
+ "extent parameter (lambda) must be 1, 2, or 3.", UserWarning
237
+ )
224
238
  return False
225
239
  return True
226
240
 
@@ -234,8 +248,8 @@ class LocalOutlierProbability(object):
234
248
  """
235
249
  if np.any(np.isnan(obj.data)):
236
250
  warnings.warn(
237
- "Method does not support missing values in input data.",
238
- UserWarning)
251
+ "Method does not support missing values in input data.", UserWarning
252
+ )
239
253
  return False
240
254
  return True
241
255
 
@@ -251,7 +265,8 @@ class LocalOutlierProbability(object):
251
265
  warnings.warn(
252
266
  "Must fit on historical data by calling fit() prior to "
253
267
  "calling stream(x).",
254
- UserWarning)
268
+ UserWarning,
269
+ )
255
270
  return False
256
271
  return True
257
272
 
@@ -269,7 +284,8 @@ class LocalOutlierProbability(object):
269
284
  warnings.warn(
270
285
  "Stream approach does not support clustered data. "
271
286
  "Automatically refit using single cluster of points.",
272
- UserWarning)
287
+ UserWarning,
288
+ )
273
289
  return False
274
290
  return True
275
291
 
@@ -291,43 +307,35 @@ class LocalOutlierProbability(object):
291
307
  assert len(types) == f.__code__.co_argcount
292
308
 
293
309
  def new_f(*args, **kwds):
294
- for (a, t) in zip(args, types):
295
- if type(a).__name__ == 'DataFrame':
310
+ for a, t in zip(args, types):
311
+ if type(a).__name__ == "DataFrame":
296
312
  a = np.array(a)
297
313
  if isinstance(a, t) is False:
298
- warnings.warn("Argument %r is not of type %s" % (a, t),
299
- UserWarning)
314
+ warnings.warn(
315
+ "Argument %r is not of type %s" % (a, t), UserWarning
316
+ )
300
317
  opt_types = {
301
- 'distance_matrix': {
302
- 'type': types[2]
303
- },
304
- 'neighbor_matrix': {
305
- 'type': types[3]
306
- },
307
- 'extent': {
308
- 'type': types[4]
309
- },
310
- 'n_neighbors': {
311
- 'type': types[5]
312
- },
313
- 'cluster_labels': {
314
- 'type': types[6]
315
- },
316
- 'use_numba': {
317
- 'type': types[7]
318
- },
319
- 'progress_bar': {
320
- 'type': types[8]
321
- }
318
+ "distance_matrix": {"type": types[2]},
319
+ "neighbor_matrix": {"type": types[3]},
320
+ "extent": {"type": types[4]},
321
+ "n_neighbors": {"type": types[5]},
322
+ "cluster_labels": {"type": types[6]},
323
+ "use_numba": {"type": types[7]},
324
+ "progress_bar": {"type": types[8]},
322
325
  }
323
326
  for x in kwds:
324
- opt_types[x]['value'] = kwds[x]
327
+ opt_types[x]["value"] = kwds[x]
325
328
  for k in opt_types:
326
329
  try:
327
- if isinstance(opt_types[k]['value'],
328
- opt_types[k]['type']) is False:
329
- warnings.warn("Argument %r is not of type %s." % (
330
- k, opt_types[k]['type']), UserWarning)
330
+ if (
331
+ isinstance(opt_types[k]["value"], opt_types[k]["type"])
332
+ is False
333
+ ):
334
+ warnings.warn(
335
+ "Argument %r is not of type %s."
336
+ % (k, opt_types[k]["type"]),
337
+ UserWarning,
338
+ )
331
339
  except KeyError:
332
340
  pass
333
341
  return f(*args, **kwds)
@@ -337,11 +345,28 @@ class LocalOutlierProbability(object):
337
345
 
338
346
  return decorator
339
347
 
340
- @accepts(object, np.ndarray, np.ndarray, np.ndarray, (int, np.integer),
341
- (int, np.integer), list, bool, bool)
342
- def __init__(self, data=None, distance_matrix=None, neighbor_matrix=None,
343
- extent=3, n_neighbors=10, cluster_labels=None,
344
- use_numba=False, progress_bar=False) -> None:
348
+ @accepts(
349
+ object,
350
+ np.ndarray,
351
+ np.ndarray,
352
+ np.ndarray,
353
+ (int, np.integer),
354
+ (int, np.integer),
355
+ list,
356
+ bool,
357
+ bool,
358
+ )
359
+ def __init__(
360
+ self,
361
+ data=None,
362
+ distance_matrix=None,
363
+ neighbor_matrix=None,
364
+ extent=3,
365
+ n_neighbors=10,
366
+ cluster_labels=None,
367
+ use_numba=False,
368
+ progress_bar=False,
369
+ ) -> None:
345
370
  self.data = data
346
371
  self.distance_matrix = distance_matrix
347
372
  self.neighbor_matrix = neighbor_matrix
@@ -358,11 +383,11 @@ class LocalOutlierProbability(object):
358
383
  self.progress_bar = progress_bar
359
384
  self.is_fit = False
360
385
 
361
- if self.use_numba is True and 'numba' not in sys.modules:
386
+ if self.use_numba is True and "numba" not in sys.modules:
362
387
  self.use_numba = False
363
388
  warnings.warn(
364
- "Numba is not available, falling back to pure python mode.",
365
- UserWarning)
389
+ "Numba is not available, falling back to pure python mode.", UserWarning
390
+ )
366
391
 
367
392
  self.Validate()._inputs(self)
368
393
  self.Validate._extent(self)
@@ -372,15 +397,14 @@ class LocalOutlierProbability(object):
372
397
  """
373
398
 
374
399
  @staticmethod
375
- def _standard_distance(cardinality: float, sum_squared_distance: float) \
376
- -> float:
400
+ def _standard_distance(cardinality: float, sum_squared_distance: float) -> float:
377
401
  """
378
402
  Calculates the standard distance of an observation.
379
403
  :param cardinality: the cardinality of the input observation.
380
404
  :param sum_squared_distance: the sum squared distance between all
381
405
  neighbors of the input observation.
382
406
  :return: the standard distance.
383
- # """
407
+ #"""
384
408
  division_result = sum_squared_distance / cardinality
385
409
  st_dist = sqrt(division_result)
386
410
  return st_dist
@@ -397,8 +421,9 @@ class LocalOutlierProbability(object):
397
421
  return extent * standard_distance
398
422
 
399
423
  @staticmethod
400
- def _prob_outlier_factor(probabilistic_distance: np.ndarray, ev_prob_dist:
401
- np.ndarray) -> np.ndarray:
424
+ def _prob_outlier_factor(
425
+ probabilistic_distance: np.ndarray, ev_prob_dist: np.ndarray
426
+ ) -> np.ndarray:
402
427
  """
403
428
  Calculates the probabilistic outlier factor of an observation.
404
429
  :param probabilistic_distance: the probabilistic distance of the
@@ -409,14 +434,14 @@ class LocalOutlierProbability(object):
409
434
  if np.all(probabilistic_distance == ev_prob_dist):
410
435
  return np.zeros(probabilistic_distance.shape)
411
436
  else:
412
- ev_prob_dist[ev_prob_dist == 0.] = 1.e-8
413
- result = np.divide(probabilistic_distance, ev_prob_dist) - 1.
437
+ ev_prob_dist[ev_prob_dist == 0.0] = 1.0e-8
438
+ result = np.divide(probabilistic_distance, ev_prob_dist) - 1.0
414
439
  return result
415
440
 
416
441
  @staticmethod
417
- def _norm_prob_outlier_factor(extent: float,
418
- ev_probabilistic_outlier_factor: list) \
419
- -> list:
442
+ def _norm_prob_outlier_factor(
443
+ extent: float, ev_probabilistic_outlier_factor: list
444
+ ) -> list:
420
445
  """
421
446
  Calculates the normalized probabilistic outlier factor of an
422
447
  observation.
@@ -431,8 +456,9 @@ class LocalOutlierProbability(object):
431
456
  return npofs
432
457
 
433
458
  @staticmethod
434
- def _local_outlier_probability(plof_val: np.ndarray, nplof_val: np.ndarray) \
435
- -> np.ndarray:
459
+ def _local_outlier_probability(
460
+ plof_val: np.ndarray, nplof_val: np.ndarray
461
+ ) -> np.ndarray:
436
462
  """
437
463
  Calculates the local outlier probability of an observation.
438
464
  :param plof_val: the probabilistic outlier factor of the input
@@ -445,7 +471,7 @@ class LocalOutlierProbability(object):
445
471
  if np.all(plof_val == nplof_val):
446
472
  return np.zeros(plof_val.shape)
447
473
  else:
448
- return np.maximum(0, erf_vec(plof_val / (nplof_val * np.sqrt(2.))))
474
+ return np.maximum(0, erf_vec(plof_val / (nplof_val * np.sqrt(2.0))))
449
475
 
450
476
  def _n_observations(self) -> int:
451
477
  """
@@ -499,8 +525,9 @@ class LocalOutlierProbability(object):
499
525
  :return: the updated storage matrix that collects information on
500
526
  each observation.
501
527
  """
502
- for vec, cluster_id in zip(range(self.distance_matrix.shape[0]),
503
- self._cluster_labels()):
528
+ for vec, cluster_id in zip(
529
+ range(self.distance_matrix.shape[0]), self._cluster_labels()
530
+ ):
504
531
  data_store[vec][0] = cluster_id
505
532
  data_store[vec][1] = self.distance_matrix[vec]
506
533
  data_store[vec][2] = self.neighbor_matrix[vec]
@@ -508,10 +535,10 @@ class LocalOutlierProbability(object):
508
535
 
509
536
  @staticmethod
510
537
  def _compute_distance_and_neighbor_matrix(
511
- clust_points_vector: np.ndarray,
512
- indices: np.ndarray,
513
- distances: np.ndarray,
514
- indexes: np.ndarray
538
+ clust_points_vector: np.ndarray,
539
+ indices: np.ndarray,
540
+ distances: np.ndarray,
541
+ indexes: np.ndarray,
515
542
  ) -> Tuple[np.ndarray, np.ndarray, int]:
516
543
  """
517
544
  This helper method provides the heavy lifting for the _distances
@@ -519,27 +546,27 @@ class LocalOutlierProbability(object):
519
546
  written so that it can make full use of Numba's jit capabilities if
520
547
  desired.
521
548
  """
522
-
523
549
  for i in range(clust_points_vector.shape[0]):
524
550
  for j in range(i + 1, clust_points_vector.shape[0]):
525
- p = ((i,), (j,))
551
+ # Global index of the points
552
+ global_i = indices[0][i]
553
+ global_j = indices[0][j]
526
554
 
527
- diff = clust_points_vector[p[0]] - clust_points_vector[p[1]]
555
+ # Compute Euclidean distance
556
+ diff = clust_points_vector[i] - clust_points_vector[j]
528
557
  d = np.dot(diff, diff) ** 0.5
529
558
 
530
- idx = indices[0][p[0]]
531
- idx_max = distances[idx].argmax()
559
+ # Update distance and neighbor index for global_i
560
+ idx_max = distances[global_i].argmax()
561
+ if d < distances[global_i][idx_max]:
562
+ distances[global_i][idx_max] = d
563
+ indexes[global_i][idx_max] = global_j
532
564
 
533
- if d < distances[idx][idx_max]:
534
- distances[idx][idx_max] = d
535
- indexes[idx][idx_max] = p[1][0]
536
-
537
- idx = indices[0][p[1]]
538
- idx_max = distances[idx].argmax()
539
-
540
- if d < distances[idx][idx_max]:
541
- distances[idx][idx_max] = d
542
- indexes[idx][idx_max] = p[0][0]
565
+ # Update distance and neighbor index for global_j
566
+ idx_max = distances[global_j].argmax()
567
+ if d < distances[global_j][idx_max]:
568
+ distances[global_j][idx_max] = d
569
+ indexes[global_j][idx_max] = global_i
543
570
 
544
571
  yield distances, indexes, i
545
572
 
@@ -552,20 +579,21 @@ class LocalOutlierProbability(object):
552
579
  :return: the updated storage matrix that collects information on
553
580
  each observation.
554
581
  """
555
- distances = np.full([self._n_observations(), self.n_neighbors], 9e10,
556
- dtype=float)
557
- indexes = np.full([self._n_observations(), self.n_neighbors], 9e10,
558
- dtype=float)
582
+ distances = np.full(
583
+ [self._n_observations(), self.n_neighbors], 9e10, dtype=float
584
+ )
585
+ indexes = np.full([self._n_observations(), self.n_neighbors], 9e10, dtype=float)
559
586
  self.points_vector = self.Validate._data(self.data)
560
- compute = numba.jit(self._compute_distance_and_neighbor_matrix,
561
- cache=True) if self.use_numba else \
562
- self._compute_distance_and_neighbor_matrix
587
+ compute = (
588
+ numba.jit(self._compute_distance_and_neighbor_matrix, cache=True)
589
+ if self.use_numba
590
+ else self._compute_distance_and_neighbor_matrix
591
+ )
563
592
  progress = "="
564
593
  for cluster_id in set(self._cluster_labels()):
565
594
  indices = np.where(self._cluster_labels() == cluster_id)
566
595
  clust_points_vector = np.array(
567
- self.points_vector.take(indices, axis=0)[0],
568
- dtype=np.float64
596
+ self.points_vector.take(indices, axis=0)[0], dtype=np.float64
569
597
  )
570
598
  # a generator that yields an updated distance matrix on each loop
571
599
  for c in compute(clust_points_vector, indices, distances, indexes):
@@ -573,7 +601,8 @@ class LocalOutlierProbability(object):
573
601
  # update the progress bar
574
602
  if progress_bar is True:
575
603
  progress = Utils.emit_progress_bar(
576
- progress, i+1, clust_points_vector.shape[0])
604
+ progress, i + 1, clust_points_vector.shape[0]
605
+ )
577
606
 
578
607
  self.distance_matrix = distances
579
608
  self.neighbor_matrix = indexes
@@ -627,11 +656,10 @@ class LocalOutlierProbability(object):
627
656
  """
628
657
  prob_distances = []
629
658
  for i in range(data_store[:, 4].shape[0]):
630
- prob_distances.append(
631
- self._prob_distance(self.extent, data_store[:, 4][i]))
659
+ prob_distances.append(self._prob_distance(self.extent, data_store[:, 4][i]))
632
660
  return np.hstack((data_store, np.array([prob_distances]).T))
633
661
 
634
- def _prob_distances_ev(self, data_store: np.ndarray) -> np.ndarray:
662
+ def _prob_distances_ev(self, data_store) -> np.ndarray:
635
663
  """
636
664
  Calculates the expected value of the probabilistic distance for
637
665
  each observation in the input data with respect to the cluster the
@@ -645,19 +673,20 @@ class LocalOutlierProbability(object):
645
673
  for cluster_id in self.cluster_labels_u:
646
674
  indices = np.where(data_store[:, 0] == cluster_id)[0]
647
675
  for index in indices:
648
- nbrhood = data_store[index][2].astype(int)
649
- nbrhood_prob_distances = np.take(data_store[:, 5],
650
- nbrhood).astype(float)
676
+ # Global neighbor indices for the current point
677
+ nbrhood = data_store[index][2].astype(int) # Ensure global indices
678
+ nbrhood_prob_distances = np.take(data_store[:, 5], nbrhood).astype(
679
+ float
680
+ )
651
681
  nbrhood_prob_distances_nonan = nbrhood_prob_distances[
652
- np.logical_not(np.isnan(nbrhood_prob_distances))]
653
- prob_set_distance_ev[index] = \
654
- nbrhood_prob_distances_nonan.mean()
682
+ np.logical_not(np.isnan(nbrhood_prob_distances))
683
+ ]
684
+ prob_set_distance_ev[index] = nbrhood_prob_distances_nonan.mean()
685
+
655
686
  self.prob_distances_ev = prob_set_distance_ev
656
- data_store = np.hstack((data_store, prob_set_distance_ev))
657
- return data_store
687
+ return np.hstack((data_store, prob_set_distance_ev))
658
688
 
659
- def _prob_local_outlier_factors(self,
660
- data_store: np.ndarray) -> np.ndarray:
689
+ def _prob_local_outlier_factors(self, data_store: np.ndarray) -> np.ndarray:
661
690
  """
662
691
  Calculates the probabilistic local outlier factor for each
663
692
  observation in the input data.
@@ -667,13 +696,22 @@ class LocalOutlierProbability(object):
667
696
  each observation.
668
697
  """
669
698
  return np.hstack(
670
- (data_store,
671
- np.array([np.apply_along_axis(self._prob_outlier_factor, 0,
672
- data_store[:, 5],
673
- data_store[:, 6])]).T))
699
+ (
700
+ data_store,
701
+ np.array(
702
+ [
703
+ np.apply_along_axis(
704
+ self._prob_outlier_factor,
705
+ 0,
706
+ data_store[:, 5],
707
+ data_store[:, 6],
708
+ )
709
+ ]
710
+ ).T,
711
+ )
712
+ )
674
713
 
675
- def _prob_local_outlier_factors_ev(self,
676
- data_store: np.ndarray) -> np.ndarray:
714
+ def _prob_local_outlier_factors_ev(self, data_store: np.ndarray) -> np.ndarray:
677
715
  """
678
716
  Calculates the expected value of the probabilistic local outlier factor
679
717
  for each observation in the input data with respect to the cluster the
@@ -686,21 +724,31 @@ class LocalOutlierProbability(object):
686
724
  prob_local_outlier_factor_ev_dict = {}
687
725
  for cluster_id in self.cluster_labels_u:
688
726
  indices = np.where(data_store[:, 0] == cluster_id)
689
- prob_local_outlier_factors = np.take(data_store[:, 7],
690
- indices).astype(float)
691
- prob_local_outlier_factors_nonan = prob_local_outlier_factors[
692
- np.logical_not(np.isnan(prob_local_outlier_factors))]
693
- prob_local_outlier_factor_ev_dict[cluster_id] = (
694
- np.power(prob_local_outlier_factors_nonan, 2).sum() /
695
- float(prob_local_outlier_factors_nonan.size)
727
+ prob_local_outlier_factors = np.take(data_store[:, 7], indices).astype(
728
+ float
696
729
  )
730
+ prob_local_outlier_factors_nonan = prob_local_outlier_factors[
731
+ np.logical_not(np.isnan(prob_local_outlier_factors))
732
+ ]
733
+ prob_local_outlier_factor_ev_dict[cluster_id] = np.power(
734
+ prob_local_outlier_factors_nonan, 2
735
+ ).sum() / float(prob_local_outlier_factors_nonan.size)
697
736
  data_store = np.hstack(
698
- (data_store, np.array([[prob_local_outlier_factor_ev_dict[x] for x
699
- in data_store[:, 0].tolist()]]).T))
737
+ (
738
+ data_store,
739
+ np.array(
740
+ [
741
+ [
742
+ prob_local_outlier_factor_ev_dict[x]
743
+ for x in data_store[:, 0].tolist()
744
+ ]
745
+ ]
746
+ ).T,
747
+ )
748
+ )
700
749
  return data_store
701
750
 
702
- def _norm_prob_local_outlier_factors(self, data_store: np.ndarray) \
703
- -> np.ndarray:
751
+ def _norm_prob_local_outlier_factors(self, data_store: np.ndarray) -> np.ndarray:
704
752
  """
705
753
  Calculates the normalized probabilistic local outlier factor for each
706
754
  observation in the input data.
@@ -709,11 +757,20 @@ class LocalOutlierProbability(object):
709
757
  :return: the updated storage matrix that collects information on
710
758
  each observation.
711
759
  """
712
- return np.hstack((data_store, np.array([self._norm_prob_outlier_factor(
713
- self.extent, data_store[:, 8].tolist())]).T))
760
+ return np.hstack(
761
+ (
762
+ data_store,
763
+ np.array(
764
+ [
765
+ self._norm_prob_outlier_factor(
766
+ self.extent, data_store[:, 8].tolist()
767
+ )
768
+ ]
769
+ ).T,
770
+ )
771
+ )
714
772
 
715
- def _local_outlier_probabilities(self,
716
- data_store: np.ndarray) -> np.ndarray:
773
+ def _local_outlier_probabilities(self, data_store: np.ndarray) -> np.ndarray:
717
774
  """
718
775
  Calculates the local outlier probability for each observation in the
719
776
  input data.
@@ -723,17 +780,26 @@ class LocalOutlierProbability(object):
723
780
  each observation.
724
781
  """
725
782
  return np.hstack(
726
- (data_store,
727
- np.array([np.apply_along_axis(self._local_outlier_probability, 0,
728
- data_store[:, 7],
729
- data_store[:, 9])]).T))
783
+ (
784
+ data_store,
785
+ np.array(
786
+ [
787
+ np.apply_along_axis(
788
+ self._local_outlier_probability,
789
+ 0,
790
+ data_store[:, 7],
791
+ data_store[:, 9],
792
+ )
793
+ ]
794
+ ).T,
795
+ )
796
+ )
730
797
 
731
798
  """
732
799
  Public methods
733
800
  """
734
801
 
735
- def fit(self) -> 'LocalOutlierProbability':
736
-
802
+ def fit(self) -> "LocalOutlierProbability":
737
803
  """
738
804
  Calculates the local outlier probability for each observation in the
739
805
  input data according to the input parameters extent, n_neighbors, and
@@ -745,8 +811,7 @@ class LocalOutlierProbability(object):
745
811
  self.Validate._n_neighbors(self)
746
812
  if self.Validate._cluster_size(self) is False:
747
813
  sys.exit()
748
- if self.data is not None and self.Validate._missing_values(
749
- self) is False:
814
+ if self.data is not None and self.Validate._missing_values(self) is False:
750
815
  sys.exit()
751
816
 
752
817
  store = self._store()
@@ -770,7 +835,6 @@ class LocalOutlierProbability(object):
770
835
  return self
771
836
 
772
837
  def stream(self, x: np.ndarray) -> np.ndarray:
773
-
774
838
  """
775
839
  Calculates the local outlier probability for an individual sample
776
840
  according to the input parameters extent, n_neighbors, and
@@ -809,12 +873,12 @@ class LocalOutlierProbability(object):
809
873
  ssd = np.power(distances, 2).sum()
810
874
  std_dist = np.sqrt(np.divide(ssd, self.n_neighbors))
811
875
  prob_dist = self._prob_distance(self.extent, std_dist)
812
- plof = self._prob_outlier_factor(np.array(prob_dist),
813
- np.array(
814
- self.prob_distances_ev.mean())
815
- )
876
+ plof = self._prob_outlier_factor(
877
+ np.array(prob_dist), np.array(self.prob_distances_ev.mean())
878
+ )
816
879
  loop = self._local_outlier_probability(
817
- plof, self.norm_prob_local_outlier_factor)
880
+ plof, self.norm_prob_local_outlier_factor
881
+ )
818
882
 
819
883
  if orig_cluster_labels is not None:
820
884
  self.cluster_labels = orig_cluster_labels
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PyNomaly
3
- Version: 0.3.2
3
+ Version: 0.3.4
4
4
  Summary: A Python 3 implementation of LoOP: Local Outlier Probabilities, a local density based outlier detection method providing an outlier score in the range of [0,1].
5
5
  Home-page: https://github.com/vc1492a/PyNomaly
6
6
  Author: Valentino Constantinou
7
7
  Author-email: vc@valentino.io
8
8
  License: Apache License, Version 2.0
9
- Download-URL: https://github.com/vc1492a/PyNomaly/archive/0.3.2.tar.gz
9
+ Download-URL: https://github.com/vc1492a/PyNomaly/archive/0.3.4.tar.gz
10
10
  Keywords: outlier,anomaly,detection,machine,learning,probability
11
11
  Platform: UNKNOWN
12
12
  Requires-Dist: numpy
@@ -0,0 +1,7 @@
1
+ PyNomaly/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ PyNomaly/loop.py,sha256=VLllAa5pOIHZjlI0XuLSpjLzY3tJ_ZTzDCbbIh3VM44,34571
3
+ PyNomaly-0.3.4.dist-info/LICENSE.txt,sha256=xZYfuJFfM57xOlBLbkJmsCwEvw1P6K2t3jI8faTdOMs,563
4
+ PyNomaly-0.3.4.dist-info/METADATA,sha256=xkHaSUSpOnZynE_KfVQAwoBXNOzTpE-IymwuiRdIeos,581
5
+ PyNomaly-0.3.4.dist-info/WHEEL,sha256=g4nMs7d-Xl9-xC9XovUrsDHGXt-FT0E17Yqo92DEfvY,92
6
+ PyNomaly-0.3.4.dist-info/top_level.txt,sha256=el-HX4RLyBjkh2CW3TK9yXAA54zQOIYVmcJjRbBYKX4,9
7
+ PyNomaly-0.3.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.33.4)
2
+ Generator: bdist_wheel (0.34.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,7 +0,0 @@
1
- PyNomaly/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- PyNomaly/loop.py,sha256=TN_uRdunxW-9T_uqwBEMe3hFQ1KBPEBaaILdx60j7dY,33782
3
- PyNomaly-0.3.2.dist-info/LICENSE.txt,sha256=xZYfuJFfM57xOlBLbkJmsCwEvw1P6K2t3jI8faTdOMs,563
4
- PyNomaly-0.3.2.dist-info/METADATA,sha256=aZyIcqIYNKlRvPfGer9jppgw5lA3n9kA-q6OWXcuYYg,581
5
- PyNomaly-0.3.2.dist-info/WHEEL,sha256=S8S5VL-stOTSZDYxHyf0KP7eds0J72qrK0Evu3TfyAY,92
6
- PyNomaly-0.3.2.dist-info/top_level.txt,sha256=el-HX4RLyBjkh2CW3TK9yXAA54zQOIYVmcJjRbBYKX4,9
7
- PyNomaly-0.3.2.dist-info/RECORD,,