aisp 0.1.35__py3-none-any.whl → 0.1.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aisp/__init__.py +1 -1
- aisp/base/__init__.py +4 -0
- aisp/base/_classifier.py +110 -0
- aisp/nsa/__init__.py +1 -1
- aisp/nsa/_base.py +26 -120
- aisp/nsa/_negative_selection.py +118 -213
- aisp/nsa/_ns_core.py +153 -0
- aisp/utils/_multiclass.py +1 -0
- aisp/utils/distance.py +215 -0
- aisp/utils/metrics.py +2 -2
- aisp/utils/sanitizers.py +3 -2
- {aisp-0.1.35.dist-info → aisp-0.1.41.dist-info}/METADATA +10 -6
- aisp-0.1.41.dist-info/RECORD +18 -0
- {aisp-0.1.35.dist-info → aisp-0.1.41.dist-info}/WHEEL +1 -1
- aisp-0.1.35.dist-info/RECORD +0 -14
- {aisp-0.1.35.dist-info → aisp-0.1.41.dist-info}/licenses/LICENSE +0 -0
- {aisp-0.1.35.dist-info → aisp-0.1.41.dist-info}/top_level.txt +0 -0
aisp/nsa/_negative_selection.py
CHANGED
@@ -1,22 +1,29 @@
|
|
1
1
|
"""Negative Selection Algorithm."""
|
2
2
|
|
3
|
-
from collections import namedtuple
|
4
3
|
from typing import Dict, Literal, Optional, Union
|
5
|
-
from scipy.spatial.distance import cdist
|
6
4
|
from tqdm import tqdm
|
7
5
|
|
8
6
|
import numpy as np
|
9
7
|
import numpy.typing as npt
|
10
8
|
|
9
|
+
from ._ns_core import (
|
10
|
+
check_detector_bnsa_validity,
|
11
|
+
bnsa_class_prediction,
|
12
|
+
check_detector_rnsa_validity,
|
13
|
+
)
|
11
14
|
from ..exceptions import MaxDiscardsReachedError
|
12
|
-
from ..utils import
|
15
|
+
from ..utils.distance import (
|
16
|
+
min_distance_to_class_vectors,
|
17
|
+
get_metric_code,
|
18
|
+
compute_metric_distance,
|
19
|
+
)
|
13
20
|
from ..utils.sanitizers import sanitize_seed, sanitize_choice, sanitize_param
|
14
|
-
from ._base import
|
21
|
+
from ._base import BaseNSA, Detector
|
15
22
|
|
16
23
|
|
17
|
-
class RNSA(
|
24
|
+
class RNSA(BaseNSA):
|
18
25
|
"""
|
19
|
-
The ``RNSA`` (Real-Valued Negative Selection Algorithm) class is for classification and
|
26
|
+
The ``RNSA`` (Real-Valued Negative Selection Algorithm) class is for classification and
|
20
27
|
identification purposes. of anomalies through the self and not self method.
|
21
28
|
|
22
29
|
Parameters
|
@@ -64,7 +71,7 @@ class RNSA(Base):
|
|
64
71
|
Notes
|
65
72
|
----------
|
66
73
|
[1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.minkowski_distance.html
|
67
|
-
|
74
|
+
|
68
75
|
[2] https://doi.org/10.1007/978-3-540-24854-5_30
|
69
76
|
|
70
77
|
"""
|
@@ -79,15 +86,9 @@ class RNSA(Base):
|
|
79
86
|
max_discards: int = 1000,
|
80
87
|
seed: int = None,
|
81
88
|
algorithm: Literal["default-NSA", "V-detector"] = "default-NSA",
|
82
|
-
**kwargs: Dict[str, Union[bool, str, float]]
|
89
|
+
**kwargs: Dict[str, Union[bool, str, float]],
|
83
90
|
):
|
84
|
-
|
85
|
-
|
86
|
-
self.metric = sanitize_choice(
|
87
|
-
metric,
|
88
|
-
["manhattan", "minkowski"],
|
89
|
-
"euclidean"
|
90
|
-
)
|
91
|
+
self.metric = sanitize_choice(metric, ["manhattan", "minkowski"], "euclidean")
|
91
92
|
self.seed = sanitize_seed(seed)
|
92
93
|
if self.seed is not None:
|
93
94
|
np.random.seed(seed)
|
@@ -95,28 +96,28 @@ class RNSA(Base):
|
|
95
96
|
self.N: int = sanitize_param(N, 100, lambda x: x >= 1)
|
96
97
|
self.r: float = sanitize_param(r, 0.05, lambda x: x > 0)
|
97
98
|
self.r_s: float = sanitize_param(r_s, 0.0001, lambda x: x > 0)
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
self._algorithm: str = algorithm
|
102
|
-
else:
|
103
|
-
self._detector = namedtuple("Detector", "position")
|
104
|
-
self._algorithm: str = "default-NSA"
|
105
|
-
|
99
|
+
self.algorithm: str = sanitize_param(
|
100
|
+
algorithm, "default-NSA", lambda x: x == "V-detector"
|
101
|
+
)
|
106
102
|
self.max_discards: int = sanitize_param(max_discards, 1000, lambda x: x > 0)
|
107
103
|
|
108
104
|
# Retrieves the variables from kwargs.
|
109
105
|
self.p: float = kwargs.get("p", 2)
|
110
|
-
self.
|
106
|
+
self.cell_bounds: bool = kwargs.get("cell_bounds", False)
|
111
107
|
self.non_self_label: str = kwargs.get("non_self_label", "non-self")
|
112
108
|
|
113
109
|
# Initializes the other class variables as None.
|
114
|
-
self.
|
110
|
+
self._detectors: Union[dict, None] = None
|
115
111
|
self.classes: npt.NDArray = None
|
116
112
|
|
113
|
+
@property
|
114
|
+
def detectors(self) -> Dict[str, list[Detector]]:
|
115
|
+
"""Returns the trained detectors, organized by class."""
|
116
|
+
return self._detectors
|
117
|
+
|
117
118
|
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True):
|
118
119
|
"""
|
119
|
-
The function ``fit(...)``, performs the training according to ``X`` and ``y``, using the
|
120
|
+
The function ``fit(...)``, performs the training according to ``X`` and ``y``, using the
|
120
121
|
method negative selection method(``NegativeSelect``).
|
121
122
|
|
122
123
|
Parameters
|
@@ -125,7 +126,13 @@ class RNSA(Base):
|
|
125
126
|
characteristics, [``N samples`` (rows)][``N features`` (columns)].
|
126
127
|
* y (``npt.NDArray``): Array of target classes of ``X`` with [``N samples`` (lines)].
|
127
128
|
verbose (``bool``): Feedback from detector generation to the user.
|
128
|
-
|
129
|
+
|
130
|
+
Raises
|
131
|
+
----------
|
132
|
+
* TypeError: If X or y are not ndarrays or have incompatible shapes.
|
133
|
+
* MaxDiscardsReachedError: The maximum number of detector discards was reached during
|
134
|
+
maturation. Check the defined radius value and consider reducing it.
|
135
|
+
|
129
136
|
Returns
|
130
137
|
----------
|
131
138
|
* (``self``): Returns the instance itself.
|
@@ -138,7 +145,7 @@ class RNSA(Base):
|
|
138
145
|
# Dictionary that will store detectors with classes as keys.
|
139
146
|
list_detectors_by_class = {}
|
140
147
|
# Separates the classes for training.
|
141
|
-
sample_index = self.
|
148
|
+
sample_index = self._slice_index_list_by_class(y)
|
142
149
|
# Progress bar for generating all detectors.
|
143
150
|
if verbose:
|
144
151
|
progress = tqdm(
|
@@ -150,6 +157,7 @@ class RNSA(Base):
|
|
150
157
|
# Initializes the empty set that will contain the valid detectors.
|
151
158
|
valid_detectors_set = []
|
152
159
|
discard_count = 0
|
160
|
+
x_class = X[sample_index[_class_]]
|
153
161
|
# Indicating which class the algorithm is currently processing for the progress bar.
|
154
162
|
if verbose:
|
155
163
|
progress.set_description_str(
|
@@ -159,21 +167,15 @@ class RNSA(Base):
|
|
159
167
|
# Generates a candidate detector vector randomly with values between 0 and 1.
|
160
168
|
vector_x = np.random.random_sample(size=X.shape[1])
|
161
169
|
# Checks the validity of the detector for non-self with respect to the class samples
|
162
|
-
valid_detector = self.__checks_valid_detector(
|
163
|
-
X=X, vector_x=vector_x, samples_index_class=sample_index[_class_]
|
164
|
-
)
|
170
|
+
valid_detector = self.__checks_valid_detector(x_class, vector_x)
|
165
171
|
|
166
172
|
# If the detector is valid, add it to the list of valid detectors.
|
167
|
-
if
|
173
|
+
if valid_detector is not False:
|
168
174
|
discard_count = 0
|
169
|
-
|
170
|
-
|
175
|
+
radius = (
|
176
|
+
valid_detector[1] if self.algorithm == "V-detector" else None
|
171
177
|
)
|
172
|
-
|
173
|
-
progress.update(1)
|
174
|
-
elif valid_detector:
|
175
|
-
discard_count = 0
|
176
|
-
valid_detectors_set.append(self._detector(vector_x))
|
178
|
+
valid_detectors_set.append(Detector(vector_x, radius))
|
177
179
|
if verbose:
|
178
180
|
progress.update(1)
|
179
181
|
else:
|
@@ -190,7 +192,7 @@ class RNSA(Base):
|
|
190
192
|
f"successfully generated\033[0m"
|
191
193
|
)
|
192
194
|
# Saves the found detectors in the attribute for the non-self detectors of the trained model
|
193
|
-
self.
|
195
|
+
self._detectors = list_detectors_by_class
|
194
196
|
return self
|
195
197
|
|
196
198
|
def predict(self, X: npt.NDArray) -> Optional[npt.NDArray]:
|
@@ -204,6 +206,13 @@ class RNSA(Base):
|
|
204
206
|
Array with input samples with [``N samples`` (Lines)] and
|
205
207
|
[``N characteristics``(Columns)]
|
206
208
|
|
209
|
+
Raises
|
210
|
+
----------
|
211
|
+
* TypeError
|
212
|
+
If X is not an ndarray or list.
|
213
|
+
* FeatureDimensionMismatch
|
214
|
+
If the number of features in X does not match the expected number.
|
215
|
+
|
207
216
|
Returns
|
208
217
|
----------
|
209
218
|
* C (``npt.NDArray``)
|
@@ -213,15 +222,15 @@ class RNSA(Base):
|
|
213
222
|
If there are no detectors for the prediction.
|
214
223
|
"""
|
215
224
|
# If there are no detectors, Returns None.
|
216
|
-
if self.
|
225
|
+
if self._detectors is None:
|
217
226
|
return None
|
218
227
|
|
219
228
|
super()._check_and_raise_exceptions_predict(
|
220
|
-
X, len(self.
|
229
|
+
X, len(self._detectors[self.classes[0]][0].position)
|
221
230
|
)
|
222
231
|
|
223
232
|
# Initializes an empty array that will store the predictions.
|
224
|
-
c =
|
233
|
+
c = []
|
225
234
|
# For each sample row in X.
|
226
235
|
for line in X:
|
227
236
|
class_found: bool
|
@@ -229,95 +238,67 @@ class RNSA(Base):
|
|
229
238
|
if _class_ is None:
|
230
239
|
class_found = False
|
231
240
|
else:
|
232
|
-
c
|
241
|
+
c.append(_class_)
|
233
242
|
class_found = True
|
234
243
|
|
235
244
|
# If there is only one class and the sample is not classified,
|
236
245
|
# set the output as non-self.
|
237
246
|
if not class_found and len(self.classes) == 1:
|
238
|
-
c
|
247
|
+
c.append(self.non_self_label)
|
239
248
|
# If the class is not identified with the detectors, assign the class with
|
240
249
|
# the greatest distance from the mean of its detectors.
|
241
250
|
elif not class_found:
|
242
251
|
average_distance: dict = {}
|
243
252
|
for _class_ in self.classes:
|
244
253
|
detectores = list(
|
245
|
-
map(lambda x: x.position, self.
|
254
|
+
map(lambda x: x.position, self._detectors[_class_])
|
246
255
|
)
|
247
256
|
average_distance[_class_] = np.average(
|
248
257
|
[self.__distance(detector, line) for detector in detectores]
|
249
258
|
)
|
250
|
-
c
|
251
|
-
return c
|
252
|
-
|
253
|
-
def __slice_index_list_by_class(self, y: npt.NDArray) -> dict:
|
254
|
-
"""
|
255
|
-
The function ``__slice_index_list_by_class(...)``, separates the indices of the lines
|
256
|
-
according to the output class, to loop through the sample array, only in positions where
|
257
|
-
the output is the class being trained.
|
258
|
-
|
259
|
-
Parameters
|
260
|
-
----------
|
261
|
-
* y (npt.NDArray)
|
262
|
-
Receives a ``y``[``N sample``] array with the output classes of the \
|
263
|
-
``X`` sample array.
|
264
|
-
|
265
|
-
Returns
|
266
|
-
----------
|
267
|
-
* dict: A dictionary with the list of array positions(``y``), with the classes as key.
|
268
|
-
"""
|
269
|
-
return slice_index_list_by_class(self.classes, y)
|
259
|
+
c.append(max(average_distance, key=average_distance.get))
|
260
|
+
return np.array(c)
|
270
261
|
|
271
262
|
def __checks_valid_detector(
|
272
|
-
self,
|
273
|
-
X: npt.NDArray = None,
|
274
|
-
vector_x: npt.NDArray = None,
|
275
|
-
samples_index_class: npt.NDArray = None
|
263
|
+
self, x_class: npt.NDArray = None, vector_x: npt.NDArray = None
|
276
264
|
) -> Union[bool, tuple[bool, float]]:
|
277
265
|
"""
|
278
266
|
Function to check if the detector has a valid non-proper ``r`` radius for the class.
|
279
267
|
|
280
268
|
Parameters
|
281
269
|
----------
|
282
|
-
*
|
283
|
-
Array ``
|
270
|
+
* x_class (``npt.NDArray``)
|
271
|
+
Array ``x_class`` with the samples per class.
|
284
272
|
* vector_x (``npt.NDArray``)
|
285
273
|
Randomly generated vector x candidate detector with values between[0, 1].
|
286
|
-
* samples_index_class (``npt.NDArray``)
|
287
|
-
Sample positions of a class in ``X``.
|
288
274
|
|
289
275
|
Returns
|
290
276
|
----------
|
291
277
|
* Validity (``bool``): Returns whether the detector is valid or not.
|
292
278
|
"""
|
293
279
|
# If any of the input arrays have zero size, Returns false.
|
294
|
-
if (
|
295
|
-
np.size(samples_index_class) == 0
|
296
|
-
or np.size(X) == 0
|
297
|
-
or np.size(vector_x) == 0
|
298
|
-
):
|
280
|
+
if np.size(x_class) == 0 or np.size(vector_x) == 0:
|
299
281
|
return False
|
300
282
|
# If self.k > 1, uses the k nearest neighbors (kNN); otherwise, checks the detector
|
301
283
|
# without considering kNN.
|
302
284
|
if self.k > 1:
|
303
285
|
knn_list = np.empty(shape=0)
|
304
|
-
for
|
286
|
+
for x in x_class:
|
305
287
|
# Calculates the distance between the two vectors and adds it to the kNN list if
|
306
288
|
# the distance is smaller than the largest distance in the list.
|
307
289
|
knn_list = self.__compare_knearest_neighbors_list(
|
308
|
-
knn_list, self.__distance(
|
290
|
+
knn_list, self.__distance(x, vector_x)
|
309
291
|
)
|
310
292
|
# If the average of the distances in the kNN list is less than the radius, Returns true.
|
311
293
|
distance_mean = np.mean(knn_list)
|
312
|
-
if self.
|
294
|
+
if self.algorithm == "V-detector":
|
313
295
|
return self.__detector_is_valid_to_vdetector(distance_mean, vector_x)
|
314
296
|
if distance_mean > (self.r + self.r_s):
|
315
297
|
return True
|
316
298
|
else:
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
self.__distance(X[i], vector_x) for i in samples_index_class
|
299
|
+
if self.algorithm == "V-detector":
|
300
|
+
distance = min_distance_to_class_vectors(
|
301
|
+
x_class, vector_x, get_metric_code(self.metric), self.p
|
321
302
|
)
|
322
303
|
return self.__detector_is_valid_to_vdetector(distance, vector_x)
|
323
304
|
|
@@ -325,15 +306,15 @@ class RNSA(Base):
|
|
325
306
|
# the radius plus the sample's radius, sets the validity of the detector to
|
326
307
|
# true.
|
327
308
|
threshold: float = self.r + self.r_s
|
328
|
-
if
|
329
|
-
|
309
|
+
if check_detector_rnsa_validity(
|
310
|
+
x_class, vector_x, threshold, get_metric_code(self.metric), self.p
|
311
|
+
):
|
312
|
+
return True # Detector is valid!
|
330
313
|
|
331
314
|
return False # Detector is not valid!
|
332
315
|
|
333
316
|
def __compare_knearest_neighbors_list(
|
334
|
-
self,
|
335
|
-
knn: npt.NDArray,
|
336
|
-
distance: float
|
317
|
+
self, knn: npt.NDArray, distance: float
|
337
318
|
) -> npt.NDArray:
|
338
319
|
"""
|
339
320
|
Compares the k-nearest neighbor distance at position ``k-1`` in the list ``knn``,
|
@@ -384,10 +365,10 @@ class RNSA(Base):
|
|
384
365
|
# Variable to indicate if the class was found with the detectors.
|
385
366
|
class_found: bool = True
|
386
367
|
sum_distance = 0
|
387
|
-
for detector in self.
|
368
|
+
for detector in self._detectors[_class_]:
|
388
369
|
distance = self.__distance(detector.position, line)
|
389
370
|
sum_distance += distance
|
390
|
-
if self.
|
371
|
+
if self.algorithm == "V-detector":
|
391
372
|
if distance <= detector.radius:
|
392
373
|
class_found = False
|
393
374
|
break
|
@@ -421,12 +402,10 @@ class RNSA(Base):
|
|
421
402
|
----------
|
422
403
|
* Distance (``float``): between the two points.
|
423
404
|
"""
|
424
|
-
return
|
405
|
+
return compute_metric_distance(u, v, get_metric_code(self.metric), self.p)
|
425
406
|
|
426
407
|
def __detector_is_valid_to_vdetector(
|
427
|
-
self,
|
428
|
-
distance: float,
|
429
|
-
vector_x: npt.NDArray
|
408
|
+
self, distance: float, vector_x: npt.NDArray
|
430
409
|
) -> Union[bool, tuple[bool, float]]:
|
431
410
|
"""
|
432
411
|
Check if the distance between the detector and the samples, minus the radius of the samples,
|
@@ -449,33 +428,17 @@ class RNSA(Base):
|
|
449
428
|
return False
|
450
429
|
|
451
430
|
# If _cell_bounds is True, considers the detector to be within the plane bounds.
|
452
|
-
if self.
|
431
|
+
if self.cell_bounds:
|
453
432
|
for p in vector_x:
|
454
433
|
if (p - new_detector_r) < 0 or (p + new_detector_r) > 1:
|
455
434
|
return False
|
456
435
|
|
457
436
|
return (True, new_detector_r)
|
458
437
|
|
459
|
-
|
460
|
-
|
461
|
-
The get_params function Returns a dictionary with the object's main parameters.
|
462
|
-
"""
|
463
|
-
return {
|
464
|
-
"N": self.N,
|
465
|
-
"r": self.r,
|
466
|
-
"k": self.k,
|
467
|
-
"metric": self.metric,
|
468
|
-
"seed": self.seed,
|
469
|
-
"algorithm": self._algorithm,
|
470
|
-
"r_s": self.r_s,
|
471
|
-
"cell_bounds": self._cell_bounds,
|
472
|
-
"p": self.p,
|
473
|
-
}
|
474
|
-
|
475
|
-
|
476
|
-
class BNSA(Base):
|
438
|
+
|
439
|
+
class BNSA(BaseNSA):
|
477
440
|
"""
|
478
|
-
The ``BNSA`` (Binary Negative Selection Algorithm) class is for classification and
|
441
|
+
The ``BNSA`` (Binary Negative Selection Algorithm) class is for classification and
|
479
442
|
identification purposes of anomalies through the self and not self method.
|
480
443
|
|
481
444
|
Parameters
|
@@ -505,7 +468,7 @@ class BNSA(Base):
|
|
505
468
|
seed: int = None,
|
506
469
|
no_label_sample_selection: Literal[
|
507
470
|
"max_average_difference", "max_nearest_difference"
|
508
|
-
] = "max_average_difference"
|
471
|
+
] = "max_average_difference",
|
509
472
|
):
|
510
473
|
super().__init__()
|
511
474
|
|
@@ -521,15 +484,21 @@ class BNSA(Base):
|
|
521
484
|
self.no_label_sample_selection: float = sanitize_param(
|
522
485
|
no_label_sample_selection,
|
523
486
|
"max_average_difference",
|
524
|
-
lambda x: x == "nearest_difference"
|
487
|
+
lambda x: x == "nearest_difference",
|
525
488
|
)
|
526
489
|
|
527
490
|
self.classes: npt.NDArray = None
|
528
|
-
self.
|
491
|
+
self._detectors: Optional[dict] = None
|
492
|
+
self._detectors_stack: npt.NDArray = None
|
493
|
+
|
494
|
+
@property
|
495
|
+
def detectors(self) -> Dict[str, npt.NDArray[np.bool_]]:
|
496
|
+
"""Returns the trained detectors, organized by class."""
|
497
|
+
return self._detectors
|
529
498
|
|
530
499
|
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True):
|
531
500
|
"""
|
532
|
-
The function ``fit(...)``, performs the training according to ``X`` and ``y``, using the
|
501
|
+
The function ``fit(...)``, performs the training according to ``X`` and ``y``, using the
|
533
502
|
method negative selection method(``NegativeSelect``).
|
534
503
|
|
535
504
|
Parameters
|
@@ -540,7 +509,7 @@ class BNSA(Base):
|
|
540
509
|
* y (``npt.NDArray``):
|
541
510
|
Array of target classes of ``X`` with [``N samples`` (lines)].
|
542
511
|
verbose (``bool``): Feedback from detector generation to the user.
|
543
|
-
|
512
|
+
|
544
513
|
Returns
|
545
514
|
----------
|
546
515
|
* (``self``): Returns the instance itself.
|
@@ -548,15 +517,14 @@ class BNSA(Base):
|
|
548
517
|
super()._check_and_raise_exceptions_fit(X, y, "BNSA")
|
549
518
|
|
550
519
|
# Converts the entire array X to boolean
|
551
|
-
|
552
|
-
X = X.astype(bool)
|
520
|
+
X = X.astype(np.bool_)
|
553
521
|
|
554
522
|
# Identifying the possible classes within the output array `y`.
|
555
523
|
self.classes = np.unique(y)
|
556
524
|
# Dictionary that will store detectors with classes as keys.
|
557
525
|
list_detectors_by_class = {}
|
558
526
|
# Separates the classes for training.
|
559
|
-
sample_index: dict = self.
|
527
|
+
sample_index: dict = self._slice_index_list_by_class(y)
|
560
528
|
# Progress bar for generating all detectors.
|
561
529
|
if verbose:
|
562
530
|
progress = tqdm(
|
@@ -574,21 +542,12 @@ class BNSA(Base):
|
|
574
542
|
progress.set_description_str(
|
575
543
|
f"Generating the detectors for the {_class_} class:"
|
576
544
|
)
|
545
|
+
x_class = X[sample_index[_class_]]
|
577
546
|
while len(valid_detectors_set) < self.N:
|
578
|
-
is_valid_detector: bool = True
|
579
547
|
# Generates a candidate detector vector randomly with values 0 and 1.
|
580
|
-
vector_x = np.random.
|
581
|
-
# Calculates the distance between the candidate and the class samples.
|
582
|
-
distances = cdist(
|
583
|
-
np.expand_dims(vector_x, axis=0),
|
584
|
-
X[sample_index[_class_]],
|
585
|
-
metric="hamming",
|
586
|
-
)
|
587
|
-
# Checks if any of the distances is below or equal to the threshold.
|
588
|
-
is_valid_detector = not np.any(distances <= self.aff_thresh)
|
589
|
-
|
548
|
+
vector_x = np.random.randint(0, 2, size=X.shape[1]).astype(np.bool_)
|
590
549
|
# If the detector is valid, add it to the list of valid detectors.
|
591
|
-
if
|
550
|
+
if check_detector_bnsa_validity(x_class, vector_x, self.aff_thresh):
|
592
551
|
discard_count = 0
|
593
552
|
valid_detectors_set.append(vector_x)
|
594
553
|
if verbose:
|
@@ -599,7 +558,7 @@ class BNSA(Base):
|
|
599
558
|
raise MaxDiscardsReachedError(_class_)
|
600
559
|
|
601
560
|
# Add detectors to the dictionary with classes as keys.
|
602
|
-
list_detectors_by_class[_class_] = valid_detectors_set
|
561
|
+
list_detectors_by_class[_class_] = np.array(valid_detectors_set)
|
603
562
|
|
604
563
|
# Notify the completion of detector generation for the classes.
|
605
564
|
if verbose:
|
@@ -608,7 +567,10 @@ class BNSA(Base):
|
|
608
567
|
f"successfully generated\033[0m"
|
609
568
|
)
|
610
569
|
# Saves the found detectors in the attribute for the class detectors.
|
611
|
-
self.
|
570
|
+
self._detectors = list_detectors_by_class
|
571
|
+
self._detectors_stack = np.array(
|
572
|
+
[np.stack(self._detectors[class_name]) for class_name in self.classes]
|
573
|
+
)
|
612
574
|
return self
|
613
575
|
|
614
576
|
def predict(self, X: npt.NDArray) -> Optional[npt.NDArray]:
|
@@ -628,11 +590,11 @@ class BNSA(Base):
|
|
628
590
|
* ``None``: If there are no detectors for the prediction.
|
629
591
|
"""
|
630
592
|
# If there are no detectors, Returns None.
|
631
|
-
if self.
|
593
|
+
if self._detectors is None:
|
632
594
|
return None
|
633
595
|
|
634
596
|
super()._check_and_raise_exceptions_predict(
|
635
|
-
X, len(self.
|
597
|
+
X, len(self._detectors[self.classes[0]][0]), "BNSA"
|
636
598
|
)
|
637
599
|
|
638
600
|
# Converts the entire array X to boolean.
|
@@ -640,37 +602,18 @@ class BNSA(Base):
|
|
640
602
|
X = X.astype(bool)
|
641
603
|
|
642
604
|
# Initializes an empty array that will store the predictions.
|
643
|
-
c =
|
605
|
+
c = []
|
644
606
|
# For each sample row in X.
|
645
607
|
for line in X:
|
646
608
|
class_found: bool = True
|
647
|
-
#
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
similarity_sum: float = 0
|
652
|
-
# Calculates the Hamming distance between the row and all detectors.
|
653
|
-
distances = cdist(
|
654
|
-
np.expand_dims(line, axis=0),
|
655
|
-
self.detectors[_class_],
|
656
|
-
metric="hamming",
|
657
|
-
)
|
658
|
-
|
659
|
-
# Check if any distance is below or equal to the threshold.
|
660
|
-
if np.any(distances <= self.aff_thresh):
|
661
|
-
class_found = False
|
662
|
-
else:
|
663
|
-
similarity_sum = np.sum(distances)
|
664
|
-
|
665
|
-
# If the sample passes through all detectors of a class, adds the class as a
|
666
|
-
# possible prediction and its average similarity.
|
667
|
-
if class_found:
|
668
|
-
possible_classes.append([_class_, similarity_sum / self.N])
|
669
|
-
|
609
|
+
# Class prediction based on detectors
|
610
|
+
class_index = bnsa_class_prediction(
|
611
|
+
line, self._detectors_stack, self.aff_thresh
|
612
|
+
)
|
670
613
|
# If belonging to one or more classes, adds the class with the greatest
|
671
614
|
# average distance
|
672
|
-
if
|
673
|
-
c
|
615
|
+
if class_index > -1:
|
616
|
+
c.append(self.classes[class_index])
|
674
617
|
class_found = True
|
675
618
|
else:
|
676
619
|
class_found = False
|
@@ -678,14 +621,14 @@ class BNSA(Base):
|
|
678
621
|
# If there is only one class and the sample is not classified, sets the
|
679
622
|
# output as non-self.
|
680
623
|
if not class_found and len(self.classes) == 1:
|
681
|
-
c
|
624
|
+
c.append("non-self")
|
682
625
|
# If the class cannot be identified by the detectors
|
683
626
|
elif not class_found:
|
684
|
-
|
627
|
+
self.__assign_class_to_non_self_sample(line, c)
|
685
628
|
|
686
|
-
return c
|
629
|
+
return np.array(c)
|
687
630
|
|
688
|
-
def __assign_class_to_non_self_sample(self, line
|
631
|
+
def __assign_class_to_non_self_sample(self, line: npt.NDArray, c: list):
|
689
632
|
"""
|
690
633
|
This function determines the class of a sample when all detectors classify it
|
691
634
|
as "non-self". Classification is performed using the ``max_average_difference``
|
@@ -702,51 +645,13 @@ class BNSA(Base):
|
|
702
645
|
"""
|
703
646
|
class_differences: dict = {}
|
704
647
|
for _class_ in self.classes:
|
648
|
+
distances = np.sum(line != self._detectors[_class_]) / self.N
|
705
649
|
# Assign the label to the class with the greatest distance from
|
706
650
|
# the nearest detector.
|
707
651
|
if self.no_label_sample_selection == "nearest_difference":
|
708
|
-
|
709
|
-
np.expand_dims(line, axis=0),
|
710
|
-
self.detectors[_class_],
|
711
|
-
metric="hamming",
|
712
|
-
).min()
|
713
|
-
class_differences[_class_] = difference_min
|
652
|
+
class_differences[_class_] = distances.min()
|
714
653
|
# Or based on the greatest distance from the average distances of the detectors.
|
715
654
|
else:
|
716
|
-
|
717
|
-
np.expand_dims(line, axis=0),
|
718
|
-
self.detectors[_class_],
|
719
|
-
metric="hamming",
|
720
|
-
).sum()
|
721
|
-
class_differences[_class_] = difference_sum / self.N
|
722
|
-
|
723
|
-
return np.append(c, [max(class_differences, key=class_differences.get)])
|
724
|
-
|
725
|
-
def __slice_index_list_by_class(self, y: npt.NDArray) -> dict:
|
726
|
-
"""
|
727
|
-
The function ``__slice_index_list_by_class(...)``, separates the indices of the lines
|
728
|
-
according to the output class, to loop through the sample array, only in positions where
|
729
|
-
the output is the class being trained.
|
655
|
+
class_differences[_class_] = distances.sum() / self.N
|
730
656
|
|
731
|
-
|
732
|
-
----------
|
733
|
-
* y (``npt.NDArray``):
|
734
|
-
Receives a ``y``[``N sample``] array with the output classes of the ``X``
|
735
|
-
sample array.
|
736
|
-
|
737
|
-
Returns
|
738
|
-
----------
|
739
|
-
* dict: A dictionary with the list of array positions(``y``), with the classes as key.
|
740
|
-
"""
|
741
|
-
return slice_index_list_by_class(self.classes, y)
|
742
|
-
|
743
|
-
def get_params(self, deep: bool = True) -> dict: # pylint: disable=W0613
|
744
|
-
"""
|
745
|
-
The get_params function Returns a dictionary with the object's main parameters.
|
746
|
-
"""
|
747
|
-
return {
|
748
|
-
"N": self.N,
|
749
|
-
"aff_thresh": self.aff_thresh,
|
750
|
-
"max_discards": self.max_discards,
|
751
|
-
"seed": self.seed,
|
752
|
-
}
|
657
|
+
c.append(max(class_differences, key=class_differences.get))
|