aisp 0.1.35__py3-none-any.whl → 0.1.40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aisp/__init__.py +1 -1
- aisp/base/__init__.py +4 -0
- aisp/base/_classifier.py +90 -0
- aisp/nsa/__init__.py +1 -1
- aisp/nsa/_base.py +26 -120
- aisp/nsa/_negative_selection.py +110 -180
- aisp/nsa/_ns_core.py +153 -0
- aisp/utils/_multiclass.py +1 -0
- aisp/utils/distance.py +215 -0
- aisp/utils/metrics.py +2 -2
- aisp/utils/sanitizers.py +3 -2
- {aisp-0.1.35.dist-info → aisp-0.1.40.dist-info}/METADATA +10 -6
- aisp-0.1.40.dist-info/RECORD +18 -0
- {aisp-0.1.35.dist-info → aisp-0.1.40.dist-info}/WHEEL +1 -1
- aisp-0.1.35.dist-info/RECORD +0 -14
- {aisp-0.1.35.dist-info → aisp-0.1.40.dist-info}/licenses/LICENSE +0 -0
- {aisp-0.1.35.dist-info → aisp-0.1.40.dist-info}/top_level.txt +0 -0
aisp/nsa/_negative_selection.py
CHANGED
@@ -1,22 +1,30 @@
|
|
1
1
|
"""Negative Selection Algorithm."""
|
2
2
|
|
3
|
-
from collections import namedtuple
|
4
3
|
from typing import Dict, Literal, Optional, Union
|
5
|
-
from scipy.spatial.distance import cdist
|
6
4
|
from tqdm import tqdm
|
7
5
|
|
8
6
|
import numpy as np
|
9
7
|
import numpy.typing as npt
|
10
8
|
|
9
|
+
from ._ns_core import (
|
10
|
+
check_detector_bnsa_validity,
|
11
|
+
bnsa_class_prediction,
|
12
|
+
check_detector_rnsa_validity,
|
13
|
+
)
|
11
14
|
from ..exceptions import MaxDiscardsReachedError
|
12
15
|
from ..utils import slice_index_list_by_class
|
16
|
+
from ..utils.distance import (
|
17
|
+
min_distance_to_class_vectors,
|
18
|
+
get_metric_code,
|
19
|
+
compute_metric_distance,
|
20
|
+
)
|
13
21
|
from ..utils.sanitizers import sanitize_seed, sanitize_choice, sanitize_param
|
14
|
-
from ._base import
|
22
|
+
from ._base import BaseNSA, Detector
|
15
23
|
|
16
24
|
|
17
|
-
class RNSA(
|
25
|
+
class RNSA(BaseNSA):
|
18
26
|
"""
|
19
|
-
The ``RNSA`` (Real-Valued Negative Selection Algorithm) class is for classification and
|
27
|
+
The ``RNSA`` (Real-Valued Negative Selection Algorithm) class is for classification and
|
20
28
|
identification purposes. of anomalies through the self and not self method.
|
21
29
|
|
22
30
|
Parameters
|
@@ -64,7 +72,7 @@ class RNSA(Base):
|
|
64
72
|
Notes
|
65
73
|
----------
|
66
74
|
[1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.minkowski_distance.html
|
67
|
-
|
75
|
+
|
68
76
|
[2] https://doi.org/10.1007/978-3-540-24854-5_30
|
69
77
|
|
70
78
|
"""
|
@@ -79,15 +87,9 @@ class RNSA(Base):
|
|
79
87
|
max_discards: int = 1000,
|
80
88
|
seed: int = None,
|
81
89
|
algorithm: Literal["default-NSA", "V-detector"] = "default-NSA",
|
82
|
-
**kwargs: Dict[str, Union[bool, str, float]]
|
90
|
+
**kwargs: Dict[str, Union[bool, str, float]],
|
83
91
|
):
|
84
|
-
|
85
|
-
|
86
|
-
self.metric = sanitize_choice(
|
87
|
-
metric,
|
88
|
-
["manhattan", "minkowski"],
|
89
|
-
"euclidean"
|
90
|
-
)
|
92
|
+
self.metric = sanitize_choice(metric, ["manhattan", "minkowski"], "euclidean")
|
91
93
|
self.seed = sanitize_seed(seed)
|
92
94
|
if self.seed is not None:
|
93
95
|
np.random.seed(seed)
|
@@ -95,28 +97,23 @@ class RNSA(Base):
|
|
95
97
|
self.N: int = sanitize_param(N, 100, lambda x: x >= 1)
|
96
98
|
self.r: float = sanitize_param(r, 0.05, lambda x: x > 0)
|
97
99
|
self.r_s: float = sanitize_param(r_s, 0.0001, lambda x: x > 0)
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
self._algorithm: str = algorithm
|
102
|
-
else:
|
103
|
-
self._detector = namedtuple("Detector", "position")
|
104
|
-
self._algorithm: str = "default-NSA"
|
105
|
-
|
100
|
+
self.algorithm: str = sanitize_param(
|
101
|
+
algorithm, "default-NSA", lambda x: x == "V-detector"
|
102
|
+
)
|
106
103
|
self.max_discards: int = sanitize_param(max_discards, 1000, lambda x: x > 0)
|
107
104
|
|
108
105
|
# Retrieves the variables from kwargs.
|
109
106
|
self.p: float = kwargs.get("p", 2)
|
110
|
-
self.
|
107
|
+
self.cell_bounds: bool = kwargs.get("cell_bounds", False)
|
111
108
|
self.non_self_label: str = kwargs.get("non_self_label", "non-self")
|
112
109
|
|
113
110
|
# Initializes the other class variables as None.
|
114
|
-
self.
|
111
|
+
self._detectors: Union[dict, None] = None
|
115
112
|
self.classes: npt.NDArray = None
|
116
113
|
|
117
114
|
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True):
|
118
115
|
"""
|
119
|
-
The function ``fit(...)``, performs the training according to ``X`` and ``y``, using the
|
116
|
+
The function ``fit(...)``, performs the training according to ``X`` and ``y``, using the
|
120
117
|
method negative selection method(``NegativeSelect``).
|
121
118
|
|
122
119
|
Parameters
|
@@ -125,7 +122,13 @@ class RNSA(Base):
|
|
125
122
|
characteristics, [``N samples`` (rows)][``N features`` (columns)].
|
126
123
|
* y (``npt.NDArray``): Array of target classes of ``X`` with [``N samples`` (lines)].
|
127
124
|
verbose (``bool``): Feedback from detector generation to the user.
|
128
|
-
|
125
|
+
|
126
|
+
Raises
|
127
|
+
----------
|
128
|
+
* TypeError: If X or y are not ndarrays or have incompatible shapes.
|
129
|
+
* MaxDiscardsReachedError: The maximum number of detector discards was reached during
|
130
|
+
maturation. Check the defined radius value and consider reducing it.
|
131
|
+
|
129
132
|
Returns
|
130
133
|
----------
|
131
134
|
* (``self``): Returns the instance itself.
|
@@ -150,6 +153,7 @@ class RNSA(Base):
|
|
150
153
|
# Initializes the empty set that will contain the valid detectors.
|
151
154
|
valid_detectors_set = []
|
152
155
|
discard_count = 0
|
156
|
+
x_class = X[sample_index[_class_]]
|
153
157
|
# Indicating which class the algorithm is currently processing for the progress bar.
|
154
158
|
if verbose:
|
155
159
|
progress.set_description_str(
|
@@ -159,21 +163,13 @@ class RNSA(Base):
|
|
159
163
|
# Generates a candidate detector vector randomly with values between 0 and 1.
|
160
164
|
vector_x = np.random.random_sample(size=X.shape[1])
|
161
165
|
# Checks the validity of the detector for non-self with respect to the class samples
|
162
|
-
valid_detector = self.__checks_valid_detector(
|
163
|
-
X=X, vector_x=vector_x, samples_index_class=sample_index[_class_]
|
164
|
-
)
|
166
|
+
valid_detector = self.__checks_valid_detector(x_class, vector_x)
|
165
167
|
|
166
168
|
# If the detector is valid, add it to the list of valid detectors.
|
167
|
-
if
|
168
|
-
discard_count = 0
|
169
|
-
valid_detectors_set.append(
|
170
|
-
self._detector(vector_x, valid_detector[1])
|
171
|
-
)
|
172
|
-
if verbose:
|
173
|
-
progress.update(1)
|
174
|
-
elif valid_detector:
|
169
|
+
if valid_detector is not False:
|
175
170
|
discard_count = 0
|
176
|
-
|
171
|
+
radius = valid_detector[1] if self.algorithm == "V-detector" else None
|
172
|
+
valid_detectors_set.append(Detector(vector_x, radius))
|
177
173
|
if verbose:
|
178
174
|
progress.update(1)
|
179
175
|
else:
|
@@ -182,7 +178,7 @@ class RNSA(Base):
|
|
182
178
|
raise MaxDiscardsReachedError(_class_)
|
183
179
|
|
184
180
|
# Add detectors, with classes as keys in the dictionary.
|
185
|
-
list_detectors_by_class[_class_] = valid_detectors_set
|
181
|
+
list_detectors_by_class[_class_] = np.array(valid_detectors_set)
|
186
182
|
# Notify completion of detector generation for the classes.
|
187
183
|
if verbose:
|
188
184
|
progress.set_description(
|
@@ -190,7 +186,7 @@ class RNSA(Base):
|
|
190
186
|
f"successfully generated\033[0m"
|
191
187
|
)
|
192
188
|
# Saves the found detectors in the attribute for the non-self detectors of the trained model
|
193
|
-
self.
|
189
|
+
self._detectors = list_detectors_by_class
|
194
190
|
return self
|
195
191
|
|
196
192
|
def predict(self, X: npt.NDArray) -> Optional[npt.NDArray]:
|
@@ -204,6 +200,13 @@ class RNSA(Base):
|
|
204
200
|
Array with input samples with [``N samples`` (Lines)] and
|
205
201
|
[``N characteristics``(Columns)]
|
206
202
|
|
203
|
+
Raises
|
204
|
+
----------
|
205
|
+
* TypeError
|
206
|
+
If X is not an ndarray or list.
|
207
|
+
* FeatureDimensionMismatch
|
208
|
+
If the number of features in X does not match the expected number.
|
209
|
+
|
207
210
|
Returns
|
208
211
|
----------
|
209
212
|
* C (``npt.NDArray``)
|
@@ -213,15 +216,15 @@ class RNSA(Base):
|
|
213
216
|
If there are no detectors for the prediction.
|
214
217
|
"""
|
215
218
|
# If there are no detectors, Returns None.
|
216
|
-
if self.
|
219
|
+
if self._detectors is None:
|
217
220
|
return None
|
218
221
|
|
219
222
|
super()._check_and_raise_exceptions_predict(
|
220
|
-
X, len(self.
|
223
|
+
X, len(self._detectors[self.classes[0]][0].position)
|
221
224
|
)
|
222
225
|
|
223
226
|
# Initializes an empty array that will store the predictions.
|
224
|
-
c =
|
227
|
+
c = []
|
225
228
|
# For each sample row in X.
|
226
229
|
for line in X:
|
227
230
|
class_found: bool
|
@@ -229,31 +232,31 @@ class RNSA(Base):
|
|
229
232
|
if _class_ is None:
|
230
233
|
class_found = False
|
231
234
|
else:
|
232
|
-
c
|
235
|
+
c.append(_class_)
|
233
236
|
class_found = True
|
234
237
|
|
235
238
|
# If there is only one class and the sample is not classified,
|
236
239
|
# set the output as non-self.
|
237
240
|
if not class_found and len(self.classes) == 1:
|
238
|
-
c
|
241
|
+
c.append(self.non_self_label)
|
239
242
|
# If the class is not identified with the detectors, assign the class with
|
240
243
|
# the greatest distance from the mean of its detectors.
|
241
244
|
elif not class_found:
|
242
245
|
average_distance: dict = {}
|
243
246
|
for _class_ in self.classes:
|
244
247
|
detectores = list(
|
245
|
-
map(lambda x: x.position, self.
|
248
|
+
map(lambda x: x.position, self._detectors[_class_])
|
246
249
|
)
|
247
250
|
average_distance[_class_] = np.average(
|
248
251
|
[self.__distance(detector, line) for detector in detectores]
|
249
252
|
)
|
250
|
-
c
|
251
|
-
return c
|
253
|
+
c.append(max(average_distance, key=average_distance.get))
|
254
|
+
return np.array(c)
|
252
255
|
|
253
256
|
def __slice_index_list_by_class(self, y: npt.NDArray) -> dict:
|
254
257
|
"""
|
255
|
-
The function ``__slice_index_list_by_class(...)``, separates the indices of the lines
|
256
|
-
according to the output class, to loop through the sample array, only in positions where
|
258
|
+
The function ``__slice_index_list_by_class(...)``, separates the indices of the lines
|
259
|
+
according to the output class, to loop through the sample array, only in positions where
|
257
260
|
the output is the class being trained.
|
258
261
|
|
259
262
|
Parameters
|
@@ -269,55 +272,45 @@ class RNSA(Base):
|
|
269
272
|
return slice_index_list_by_class(self.classes, y)
|
270
273
|
|
271
274
|
def __checks_valid_detector(
|
272
|
-
self,
|
273
|
-
X: npt.NDArray = None,
|
274
|
-
vector_x: npt.NDArray = None,
|
275
|
-
samples_index_class: npt.NDArray = None
|
275
|
+
self, x_class: npt.NDArray = None, vector_x: npt.NDArray = None
|
276
276
|
) -> Union[bool, tuple[bool, float]]:
|
277
277
|
"""
|
278
278
|
Function to check if the detector has a valid non-proper ``r`` radius for the class.
|
279
279
|
|
280
280
|
Parameters
|
281
281
|
----------
|
282
|
-
*
|
283
|
-
Array ``
|
282
|
+
* x_class (``npt.NDArray``)
|
283
|
+
Array ``x_class`` with the samples per class.
|
284
284
|
* vector_x (``npt.NDArray``)
|
285
285
|
Randomly generated vector x candidate detector with values between[0, 1].
|
286
|
-
* samples_index_class (``npt.NDArray``)
|
287
|
-
Sample positions of a class in ``X``.
|
288
286
|
|
289
287
|
Returns
|
290
288
|
----------
|
291
289
|
* Validity (``bool``): Returns whether the detector is valid or not.
|
292
290
|
"""
|
293
291
|
# If any of the input arrays have zero size, Returns false.
|
294
|
-
if (
|
295
|
-
np.size(samples_index_class) == 0
|
296
|
-
or np.size(X) == 0
|
297
|
-
or np.size(vector_x) == 0
|
298
|
-
):
|
292
|
+
if np.size(x_class) == 0 or np.size(vector_x) == 0:
|
299
293
|
return False
|
300
294
|
# If self.k > 1, uses the k nearest neighbors (kNN); otherwise, checks the detector
|
301
295
|
# without considering kNN.
|
302
296
|
if self.k > 1:
|
303
297
|
knn_list = np.empty(shape=0)
|
304
|
-
for
|
298
|
+
for x in x_class:
|
305
299
|
# Calculates the distance between the two vectors and adds it to the kNN list if
|
306
300
|
# the distance is smaller than the largest distance in the list.
|
307
301
|
knn_list = self.__compare_knearest_neighbors_list(
|
308
|
-
knn_list, self.__distance(
|
302
|
+
knn_list, self.__distance(x, vector_x)
|
309
303
|
)
|
310
304
|
# If the average of the distances in the kNN list is less than the radius, Returns true.
|
311
305
|
distance_mean = np.mean(knn_list)
|
312
|
-
if self.
|
306
|
+
if self.algorithm == "V-detector":
|
313
307
|
return self.__detector_is_valid_to_vdetector(distance_mean, vector_x)
|
314
308
|
if distance_mean > (self.r + self.r_s):
|
315
309
|
return True
|
316
310
|
else:
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
self.__distance(X[i], vector_x) for i in samples_index_class
|
311
|
+
if self.algorithm == "V-detector":
|
312
|
+
distance = min_distance_to_class_vectors(
|
313
|
+
x_class, vector_x, get_metric_code(self.metric), self.p
|
321
314
|
)
|
322
315
|
return self.__detector_is_valid_to_vdetector(distance, vector_x)
|
323
316
|
|
@@ -325,15 +318,15 @@ class RNSA(Base):
|
|
325
318
|
# the radius plus the sample's radius, sets the validity of the detector to
|
326
319
|
# true.
|
327
320
|
threshold: float = self.r + self.r_s
|
328
|
-
if
|
329
|
-
|
321
|
+
if check_detector_rnsa_validity(
|
322
|
+
x_class, vector_x, threshold, get_metric_code(self.metric), self.p
|
323
|
+
):
|
324
|
+
return True # Detector is valid!
|
330
325
|
|
331
326
|
return False # Detector is not valid!
|
332
327
|
|
333
328
|
def __compare_knearest_neighbors_list(
|
334
|
-
self,
|
335
|
-
knn: npt.NDArray,
|
336
|
-
distance: float
|
329
|
+
self, knn: npt.NDArray, distance: float
|
337
330
|
) -> npt.NDArray:
|
338
331
|
"""
|
339
332
|
Compares the k-nearest neighbor distance at position ``k-1`` in the list ``knn``,
|
@@ -384,10 +377,10 @@ class RNSA(Base):
|
|
384
377
|
# Variable to indicate if the class was found with the detectors.
|
385
378
|
class_found: bool = True
|
386
379
|
sum_distance = 0
|
387
|
-
for detector in self.
|
380
|
+
for detector in self._detectors[_class_]:
|
388
381
|
distance = self.__distance(detector.position, line)
|
389
382
|
sum_distance += distance
|
390
|
-
if self.
|
383
|
+
if self.algorithm == "V-detector":
|
391
384
|
if distance <= detector.radius:
|
392
385
|
class_found = False
|
393
386
|
break
|
@@ -421,12 +414,10 @@ class RNSA(Base):
|
|
421
414
|
----------
|
422
415
|
* Distance (``float``): between the two points.
|
423
416
|
"""
|
424
|
-
return
|
417
|
+
return compute_metric_distance(u, v, get_metric_code(self.metric), self.p)
|
425
418
|
|
426
419
|
def __detector_is_valid_to_vdetector(
|
427
|
-
self,
|
428
|
-
distance: float,
|
429
|
-
vector_x: npt.NDArray
|
420
|
+
self, distance: float, vector_x: npt.NDArray
|
430
421
|
) -> Union[bool, tuple[bool, float]]:
|
431
422
|
"""
|
432
423
|
Check if the distance between the detector and the samples, minus the radius of the samples,
|
@@ -449,33 +440,17 @@ class RNSA(Base):
|
|
449
440
|
return False
|
450
441
|
|
451
442
|
# If _cell_bounds is True, considers the detector to be within the plane bounds.
|
452
|
-
if self.
|
443
|
+
if self.cell_bounds:
|
453
444
|
for p in vector_x:
|
454
445
|
if (p - new_detector_r) < 0 or (p + new_detector_r) > 1:
|
455
446
|
return False
|
456
447
|
|
457
448
|
return (True, new_detector_r)
|
458
449
|
|
459
|
-
|
460
|
-
|
461
|
-
The get_params function Returns a dictionary with the object's main parameters.
|
462
|
-
"""
|
463
|
-
return {
|
464
|
-
"N": self.N,
|
465
|
-
"r": self.r,
|
466
|
-
"k": self.k,
|
467
|
-
"metric": self.metric,
|
468
|
-
"seed": self.seed,
|
469
|
-
"algorithm": self._algorithm,
|
470
|
-
"r_s": self.r_s,
|
471
|
-
"cell_bounds": self._cell_bounds,
|
472
|
-
"p": self.p,
|
473
|
-
}
|
474
|
-
|
475
|
-
|
476
|
-
class BNSA(Base):
|
450
|
+
|
451
|
+
class BNSA(BaseNSA):
|
477
452
|
"""
|
478
|
-
The ``BNSA`` (Binary Negative Selection Algorithm) class is for classification and
|
453
|
+
The ``BNSA`` (Binary Negative Selection Algorithm) class is for classification and
|
479
454
|
identification purposes of anomalies through the self and not self method.
|
480
455
|
|
481
456
|
Parameters
|
@@ -505,7 +480,7 @@ class BNSA(Base):
|
|
505
480
|
seed: int = None,
|
506
481
|
no_label_sample_selection: Literal[
|
507
482
|
"max_average_difference", "max_nearest_difference"
|
508
|
-
] = "max_average_difference"
|
483
|
+
] = "max_average_difference",
|
509
484
|
):
|
510
485
|
super().__init__()
|
511
486
|
|
@@ -521,15 +496,16 @@ class BNSA(Base):
|
|
521
496
|
self.no_label_sample_selection: float = sanitize_param(
|
522
497
|
no_label_sample_selection,
|
523
498
|
"max_average_difference",
|
524
|
-
lambda x: x == "nearest_difference"
|
499
|
+
lambda x: x == "nearest_difference",
|
525
500
|
)
|
526
501
|
|
527
502
|
self.classes: npt.NDArray = None
|
528
|
-
self.
|
503
|
+
self._detectors: Optional[dict] = None
|
504
|
+
self._detectors_stack: npt.NDArray = None
|
529
505
|
|
530
506
|
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True):
|
531
507
|
"""
|
532
|
-
The function ``fit(...)``, performs the training according to ``X`` and ``y``, using the
|
508
|
+
The function ``fit(...)``, performs the training according to ``X`` and ``y``, using the
|
533
509
|
method negative selection method(``NegativeSelect``).
|
534
510
|
|
535
511
|
Parameters
|
@@ -540,7 +516,7 @@ class BNSA(Base):
|
|
540
516
|
* y (``npt.NDArray``):
|
541
517
|
Array of target classes of ``X`` with [``N samples`` (lines)].
|
542
518
|
verbose (``bool``): Feedback from detector generation to the user.
|
543
|
-
|
519
|
+
|
544
520
|
Returns
|
545
521
|
----------
|
546
522
|
* (``self``): Returns the instance itself.
|
@@ -548,8 +524,7 @@ class BNSA(Base):
|
|
548
524
|
super()._check_and_raise_exceptions_fit(X, y, "BNSA")
|
549
525
|
|
550
526
|
# Converts the entire array X to boolean
|
551
|
-
|
552
|
-
X = X.astype(bool)
|
527
|
+
X = X.astype(np.bool_)
|
553
528
|
|
554
529
|
# Identifying the possible classes within the output array `y`.
|
555
530
|
self.classes = np.unique(y)
|
@@ -574,21 +549,12 @@ class BNSA(Base):
|
|
574
549
|
progress.set_description_str(
|
575
550
|
f"Generating the detectors for the {_class_} class:"
|
576
551
|
)
|
552
|
+
x_class = X[sample_index[_class_]]
|
577
553
|
while len(valid_detectors_set) < self.N:
|
578
|
-
is_valid_detector: bool = True
|
579
554
|
# Generates a candidate detector vector randomly with values 0 and 1.
|
580
|
-
vector_x = np.random.
|
581
|
-
# Calculates the distance between the candidate and the class samples.
|
582
|
-
distances = cdist(
|
583
|
-
np.expand_dims(vector_x, axis=0),
|
584
|
-
X[sample_index[_class_]],
|
585
|
-
metric="hamming",
|
586
|
-
)
|
587
|
-
# Checks if any of the distances is below or equal to the threshold.
|
588
|
-
is_valid_detector = not np.any(distances <= self.aff_thresh)
|
589
|
-
|
555
|
+
vector_x = np.random.randint(0, 2, size=X.shape[1]).astype(np.bool_)
|
590
556
|
# If the detector is valid, add it to the list of valid detectors.
|
591
|
-
if
|
557
|
+
if check_detector_bnsa_validity(x_class, vector_x, self.aff_thresh):
|
592
558
|
discard_count = 0
|
593
559
|
valid_detectors_set.append(vector_x)
|
594
560
|
if verbose:
|
@@ -599,7 +565,7 @@ class BNSA(Base):
|
|
599
565
|
raise MaxDiscardsReachedError(_class_)
|
600
566
|
|
601
567
|
# Add detectors to the dictionary with classes as keys.
|
602
|
-
list_detectors_by_class[_class_] = valid_detectors_set
|
568
|
+
list_detectors_by_class[_class_] = np.array(valid_detectors_set)
|
603
569
|
|
604
570
|
# Notify the completion of detector generation for the classes.
|
605
571
|
if verbose:
|
@@ -608,7 +574,10 @@ class BNSA(Base):
|
|
608
574
|
f"successfully generated\033[0m"
|
609
575
|
)
|
610
576
|
# Saves the found detectors in the attribute for the class detectors.
|
611
|
-
self.
|
577
|
+
self._detectors = list_detectors_by_class
|
578
|
+
self._detectors_stack = np.array(
|
579
|
+
[np.stack(self._detectors[class_name]) for class_name in self.classes]
|
580
|
+
)
|
612
581
|
return self
|
613
582
|
|
614
583
|
def predict(self, X: npt.NDArray) -> Optional[npt.NDArray]:
|
@@ -628,11 +597,11 @@ class BNSA(Base):
|
|
628
597
|
* ``None``: If there are no detectors for the prediction.
|
629
598
|
"""
|
630
599
|
# If there are no detectors, Returns None.
|
631
|
-
if self.
|
600
|
+
if self._detectors is None:
|
632
601
|
return None
|
633
602
|
|
634
603
|
super()._check_and_raise_exceptions_predict(
|
635
|
-
X, len(self.
|
604
|
+
X, len(self._detectors[self.classes[0]][0]), "BNSA"
|
636
605
|
)
|
637
606
|
|
638
607
|
# Converts the entire array X to boolean.
|
@@ -640,37 +609,18 @@ class BNSA(Base):
|
|
640
609
|
X = X.astype(bool)
|
641
610
|
|
642
611
|
# Initializes an empty array that will store the predictions.
|
643
|
-
c =
|
612
|
+
c = []
|
644
613
|
# For each sample row in X.
|
645
614
|
for line in X:
|
646
615
|
class_found: bool = True
|
647
|
-
#
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
similarity_sum: float = 0
|
652
|
-
# Calculates the Hamming distance between the row and all detectors.
|
653
|
-
distances = cdist(
|
654
|
-
np.expand_dims(line, axis=0),
|
655
|
-
self.detectors[_class_],
|
656
|
-
metric="hamming",
|
657
|
-
)
|
658
|
-
|
659
|
-
# Check if any distance is below or equal to the threshold.
|
660
|
-
if np.any(distances <= self.aff_thresh):
|
661
|
-
class_found = False
|
662
|
-
else:
|
663
|
-
similarity_sum = np.sum(distances)
|
664
|
-
|
665
|
-
# If the sample passes through all detectors of a class, adds the class as a
|
666
|
-
# possible prediction and its average similarity.
|
667
|
-
if class_found:
|
668
|
-
possible_classes.append([_class_, similarity_sum / self.N])
|
669
|
-
|
616
|
+
# Class prediction based on detectors
|
617
|
+
class_index = bnsa_class_prediction(
|
618
|
+
line, self._detectors_stack, self.aff_thresh
|
619
|
+
)
|
670
620
|
# If belonging to one or more classes, adds the class with the greatest
|
671
621
|
# average distance
|
672
|
-
if
|
673
|
-
c
|
622
|
+
if class_index > -1:
|
623
|
+
c.append(self.classes[class_index])
|
674
624
|
class_found = True
|
675
625
|
else:
|
676
626
|
class_found = False
|
@@ -678,14 +628,14 @@ class BNSA(Base):
|
|
678
628
|
# If there is only one class and the sample is not classified, sets the
|
679
629
|
# output as non-self.
|
680
630
|
if not class_found and len(self.classes) == 1:
|
681
|
-
c
|
631
|
+
c.append("non-self")
|
682
632
|
# If the class cannot be identified by the detectors
|
683
633
|
elif not class_found:
|
684
|
-
|
634
|
+
self.__assign_class_to_non_self_sample(line, c)
|
685
635
|
|
686
|
-
return c
|
636
|
+
return np.array(c)
|
687
637
|
|
688
|
-
def __assign_class_to_non_self_sample(self, line
|
638
|
+
def __assign_class_to_non_self_sample(self, line: npt.NDArray, c: list):
|
689
639
|
"""
|
690
640
|
This function determines the class of a sample when all detectors classify it
|
691
641
|
as "non-self". Classification is performed using the ``max_average_difference``
|
@@ -702,30 +652,21 @@ class BNSA(Base):
|
|
702
652
|
"""
|
703
653
|
class_differences: dict = {}
|
704
654
|
for _class_ in self.classes:
|
655
|
+
distances = np.sum(line != self._detectors[_class_]) / self.N
|
705
656
|
# Assign the label to the class with the greatest distance from
|
706
657
|
# the nearest detector.
|
707
658
|
if self.no_label_sample_selection == "nearest_difference":
|
708
|
-
|
709
|
-
np.expand_dims(line, axis=0),
|
710
|
-
self.detectors[_class_],
|
711
|
-
metric="hamming",
|
712
|
-
).min()
|
713
|
-
class_differences[_class_] = difference_min
|
659
|
+
class_differences[_class_] = distances.min()
|
714
660
|
# Or based on the greatest distance from the average distances of the detectors.
|
715
661
|
else:
|
716
|
-
|
717
|
-
np.expand_dims(line, axis=0),
|
718
|
-
self.detectors[_class_],
|
719
|
-
metric="hamming",
|
720
|
-
).sum()
|
721
|
-
class_differences[_class_] = difference_sum / self.N
|
662
|
+
class_differences[_class_] = distances.sum() / self.N
|
722
663
|
|
723
|
-
|
664
|
+
c.append(max(class_differences, key=class_differences.get))
|
724
665
|
|
725
666
|
def __slice_index_list_by_class(self, y: npt.NDArray) -> dict:
|
726
667
|
"""
|
727
|
-
The function ``__slice_index_list_by_class(...)``, separates the indices of the lines
|
728
|
-
according to the output class, to loop through the sample array, only in positions where
|
668
|
+
The function ``__slice_index_list_by_class(...)``, separates the indices of the lines
|
669
|
+
according to the output class, to loop through the sample array, only in positions where
|
729
670
|
the output is the class being trained.
|
730
671
|
|
731
672
|
Parameters
|
@@ -739,14 +680,3 @@ class BNSA(Base):
|
|
739
680
|
* dict: A dictionary with the list of array positions(``y``), with the classes as key.
|
740
681
|
"""
|
741
682
|
return slice_index_list_by_class(self.classes, y)
|
742
|
-
|
743
|
-
def get_params(self, deep: bool = True) -> dict: # pylint: disable=W0613
|
744
|
-
"""
|
745
|
-
The get_params function Returns a dictionary with the object's main parameters.
|
746
|
-
"""
|
747
|
-
return {
|
748
|
-
"N": self.N,
|
749
|
-
"aff_thresh": self.aff_thresh,
|
750
|
-
"max_discards": self.max_discards,
|
751
|
-
"seed": self.seed,
|
752
|
-
}
|