aisp 0.1.33__py3-none-any.whl → 0.1.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,752 @@
1
+ """Negative Selection Algorithm."""
2
+
3
+ from collections import namedtuple
4
+ from typing import Dict, Literal, Optional, Union
5
+ from scipy.spatial.distance import cdist
6
+ from tqdm import tqdm
7
+
8
+ import numpy as np
9
+ import numpy.typing as npt
10
+
11
+ from ..exceptions import MaxDiscardsReachedError
12
+ from ..utils import slice_index_list_by_class
13
+ from ..utils.sanitizers import sanitize_seed, sanitize_choice, sanitize_param
14
+ from ._base import Base
15
+
16
+
17
+ class RNSA(Base):
18
+ """
19
+ The ``RNSA`` (Real-Valued Negative Selection Algorithm) class is for classification and
20
+ identification purposes. of anomalies through the self and not self method.
21
+
22
+ Parameters
23
+ ----------
24
+ * N (``int``): Number of detectors. Defaults to ``100``.
25
+ * r (``float``): Radius of the detector. Defaults to ``0.05``.
26
+ * r_s (``float``): rₛ Radius of the ``X`` own samples. Defaults to ``0.0001``.
27
+ * k (``int``): Number of neighbors near the randomly generated detectors to perform the
28
+ distance average calculation. Defaults to ``1``.
29
+ * metric (``str``): Way to calculate the distance between the detector and the sample:
30
+ + ``'Euclidean'`` ➜ The calculation of the distance is given by the expression:
31
+ √( (x₁ – x₂)² + (y₁ – y₂)² + ... + (yn – yn)²).
32
+ + ``'minkowski'`` ➜ The calculation of the distance is given by the expression:
33
+ ( |X₁ – Y₁|p + |X₂ – Y₂|p + ... + |Xn – Yn|p) ¹/ₚ.
34
+ + ``'manhattan'`` ➜ The calculation of the distance is given by the expression:
35
+ ( |x₁ – x₂| + |y₁ – y₂| + ... + |yn – yn|) .
36
+
37
+ Defaults to ``'euclidean'``.
38
+ * max_discards (``int``): This parameter indicates the maximum number of consecutive
39
+ detector discards, aimed at preventing a possible infinite loop in case a radius
40
+ is defined that cannot generate non-self detectors. Defaults to ``1000``.
41
+ * seed (``int``): Seed for the random generation of values in the detectors. Defaults to
42
+ ``None``.
43
+ * algorithm(``str``), Set the algorithm version:
44
+ * ``'default-NSA'``: Default algorithm with fixed radius.
45
+ * ``'V-detector'``: This algorithm is based on the article \
46
+ [Real-Valued Negative Selection Algorithm with Variable-Sized Detectors][2], \
47
+ by Ji, Z., Dasgupta, D. (2004), and uses a variable radius for anomaly \
48
+ detection in feature spaces.
49
+
50
+ Defaults to ``'default-NSA'``.
51
+
52
+ * ``**kwargs``:
53
+ - non_self_label (``str``): This variable stores the label that will be assigned \
54
+ when the data has only one output class, and the sample is classified as not \
55
+ belonging to that class. Defaults to ``'non-self'``.
56
+ - cell_bounds (``bool``): If set to ``True``, this option limits the generation \
57
+ of detectors to the space within the plane between 0 and 1. This means that \
58
+ any detector whose radius exceeds this limit is discarded, this variable is \
59
+ only used in the ``V-detector`` algorithm. Defaults to ``False``.
60
+ - p (``float``): This parameter stores the value of ``p`` used in the Minkowski \
61
+ distance. The default is ``2``, which represents normalized Euclidean distance.\
62
+ Different values of p lead to different variants of the [Minkowski Distance][1].
63
+
64
+ Notes
65
+ ----------
66
+ [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.minkowski_distance.html
67
+
68
+ [2] https://doi.org/10.1007/978-3-540-24854-5_30
69
+
70
+ """
71
+
72
+ def __init__(
73
+ self,
74
+ N: int = 100,
75
+ r: float = 0.05,
76
+ r_s: float = 0.0001,
77
+ k: int = 1,
78
+ metric: Literal["manhattan", "minkowski", "euclidean"] = "euclidean",
79
+ max_discards: int = 1000,
80
+ seed: int = None,
81
+ algorithm: Literal["default-NSA", "V-detector"] = "default-NSA",
82
+ **kwargs: Dict[str, Union[bool, str, float]]
83
+ ):
84
+ super().__init__(metric)
85
+
86
+ self.metric = sanitize_choice(
87
+ metric,
88
+ ["manhattan", "minkowski"],
89
+ "euclidean"
90
+ )
91
+ self.seed = sanitize_seed(seed)
92
+ if self.seed is not None:
93
+ np.random.seed(seed)
94
+ self.k: int = sanitize_param(k, 1, lambda x: x > 1)
95
+ self.N: int = sanitize_param(N, 100, lambda x: x >= 1)
96
+ self.r: float = sanitize_param(r, 0.05, lambda x: x > 0)
97
+ self.r_s: float = sanitize_param(r_s, 0.0001, lambda x: x > 0)
98
+
99
+ if algorithm == "V-detector":
100
+ self._detector = namedtuple("Detector", ["position", "radius"])
101
+ self._algorithm: str = algorithm
102
+ else:
103
+ self._detector = namedtuple("Detector", "position")
104
+ self._algorithm: str = "default-NSA"
105
+
106
+ self.max_discards: int = sanitize_param(max_discards, 1000, lambda x: x > 0)
107
+
108
+ # Retrieves the variables from kwargs.
109
+ self.p: float = kwargs.get("p", 2)
110
+ self._cell_bounds: bool = kwargs.get("cell_bounds", False)
111
+ self.non_self_label: str = kwargs.get("non_self_label", "non-self")
112
+
113
+ # Initializes the other class variables as None.
114
+ self.detectors: Union[dict, None] = None
115
+ self.classes: npt.NDArray = None
116
+
117
+ def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True):
118
+ """
119
+ The function ``fit(...)``, performs the training according to ``X`` and ``y``, using the
120
+ method negative selection method(``NegativeSelect``).
121
+
122
+ Parameters
123
+ ----------
124
+ * X (``npt.NDArray``): Training array, containing the samples and their \
125
+ characteristics, [``N samples`` (rows)][``N features`` (columns)].
126
+ * y (``npt.NDArray``): Array of target classes of ``X`` with [``N samples`` (lines)].
127
+ verbose (``bool``): Feedback from detector generation to the user.
128
+
129
+ Returns
130
+ ----------
131
+ * (``self``): Returns the instance itself.
132
+ """
133
+ progress = None
134
+ super()._check_and_raise_exceptions_fit(X, y)
135
+
136
+ # Identifying the possible classes within the output array `y`.
137
+ self.classes = np.unique(y)
138
+ # Dictionary that will store detectors with classes as keys.
139
+ list_detectors_by_class = {}
140
+ # Separates the classes for training.
141
+ sample_index = self.__slice_index_list_by_class(y)
142
+ # Progress bar for generating all detectors.
143
+ if verbose:
144
+ progress = tqdm(
145
+ total=int(self.N * (len(self.classes))),
146
+ bar_format="{desc} ┇{bar}┇ {n}/{total} detectors",
147
+ postfix="\n",
148
+ )
149
+ for _class_ in self.classes:
150
+ # Initializes the empty set that will contain the valid detectors.
151
+ valid_detectors_set = []
152
+ discard_count = 0
153
+ # Indicating which class the algorithm is currently processing for the progress bar.
154
+ if verbose:
155
+ progress.set_description_str(
156
+ f"Generating the detectors for the {_class_} class:"
157
+ )
158
+ while len(valid_detectors_set) < self.N:
159
+ # Generates a candidate detector vector randomly with values between 0 and 1.
160
+ vector_x = np.random.random_sample(size=X.shape[1])
161
+ # Checks the validity of the detector for non-self with respect to the class samples
162
+ valid_detector = self.__checks_valid_detector(
163
+ X=X, vector_x=vector_x, samples_index_class=sample_index[_class_]
164
+ )
165
+
166
+ # If the detector is valid, add it to the list of valid detectors.
167
+ if self._algorithm == "V-detector" and valid_detector is not False:
168
+ discard_count = 0
169
+ valid_detectors_set.append(
170
+ self._detector(vector_x, valid_detector[1])
171
+ )
172
+ if verbose:
173
+ progress.update(1)
174
+ elif valid_detector:
175
+ discard_count = 0
176
+ valid_detectors_set.append(self._detector(vector_x))
177
+ if verbose:
178
+ progress.update(1)
179
+ else:
180
+ discard_count += 1
181
+ if discard_count == self.max_discards:
182
+ raise MaxDiscardsReachedError(_class_)
183
+
184
+ # Add detectors, with classes as keys in the dictionary.
185
+ list_detectors_by_class[_class_] = valid_detectors_set
186
+ # Notify completion of detector generation for the classes.
187
+ if verbose:
188
+ progress.set_description(
189
+ f"\033[92m✔ Non-self detectors for classes ({', '.join(map(str, self.classes))}) "
190
+ f"successfully generated\033[0m"
191
+ )
192
+ # Saves the found detectors in the attribute for the non-self detectors of the trained model
193
+ self.detectors = list_detectors_by_class
194
+ return self
195
+
196
+ def predict(self, X: npt.NDArray) -> Optional[npt.NDArray]:
197
+ """
198
+ Function to perform the prediction of classes based on detectors
199
+ created after training.
200
+
201
+ Parameters
202
+ ----------
203
+ * X (``npt.NDArray``)
204
+ Array with input samples with [``N samples`` (Lines)] and
205
+ [``N characteristics``(Columns)]
206
+
207
+ Returns
208
+ ----------
209
+ * C (``npt.NDArray``)
210
+ an ndarray of the form ``C`` [``N samples``], containing the predicted classes
211
+ for ``X``.
212
+ * ``None``
213
+ If there are no detectors for the prediction.
214
+ """
215
+ # If there are no detectors, Returns None.
216
+ if self.detectors is None:
217
+ return None
218
+
219
+ super()._check_and_raise_exceptions_predict(
220
+ X, len(self.detectors[self.classes[0]][0].position)
221
+ )
222
+
223
+ # Initializes an empty array that will store the predictions.
224
+ c = np.empty(shape=0)
225
+ # For each sample row in X.
226
+ for line in X:
227
+ class_found: bool
228
+ _class_ = self.__compare_sample_to_detectors(line)
229
+ if _class_ is None:
230
+ class_found = False
231
+ else:
232
+ c = np.append(c, [_class_])
233
+ class_found = True
234
+
235
+ # If there is only one class and the sample is not classified,
236
+ # set the output as non-self.
237
+ if not class_found and len(self.classes) == 1:
238
+ c = np.append(c, [self.non_self_label])
239
+ # If the class is not identified with the detectors, assign the class with
240
+ # the greatest distance from the mean of its detectors.
241
+ elif not class_found:
242
+ average_distance: dict = {}
243
+ for _class_ in self.classes:
244
+ detectores = list(
245
+ map(lambda x: x.position, self.detectors[_class_])
246
+ )
247
+ average_distance[_class_] = np.average(
248
+ [self.__distance(detector, line) for detector in detectores]
249
+ )
250
+ c = np.append(c, [max(average_distance, key=average_distance.get)])
251
+ return c
252
+
253
+ def __slice_index_list_by_class(self, y: npt.NDArray) -> dict:
254
+ """
255
+ The function ``__slice_index_list_by_class(...)``, separates the indices of the lines
256
+ according to the output class, to loop through the sample array, only in positions where
257
+ the output is the class being trained.
258
+
259
+ Parameters
260
+ ----------
261
+ * y (npt.NDArray)
262
+ Receives a ``y``[``N sample``] array with the output classes of the \
263
+ ``X`` sample array.
264
+
265
+ Returns
266
+ ----------
267
+ * dict: A dictionary with the list of array positions(``y``), with the classes as key.
268
+ """
269
+ return slice_index_list_by_class(self.classes, y)
270
+
271
+ def __checks_valid_detector(
272
+ self,
273
+ X: npt.NDArray = None,
274
+ vector_x: npt.NDArray = None,
275
+ samples_index_class: npt.NDArray = None
276
+ ) -> Union[bool, tuple[bool, float]]:
277
+ """
278
+ Function to check if the detector has a valid non-proper ``r`` radius for the class.
279
+
280
+ Parameters
281
+ ----------
282
+ * X (``npt.NDArray``)
283
+ Array ``X`` with the samples.
284
+ * vector_x (``npt.NDArray``)
285
+ Randomly generated vector x candidate detector with values between[0, 1].
286
+ * samples_index_class (``npt.NDArray``)
287
+ Sample positions of a class in ``X``.
288
+
289
+ Returns
290
+ ----------
291
+ * Validity (``bool``): Returns whether the detector is valid or not.
292
+ """
293
+ # If any of the input arrays have zero size, Returns false.
294
+ if (
295
+ np.size(samples_index_class) == 0
296
+ or np.size(X) == 0
297
+ or np.size(vector_x) == 0
298
+ ):
299
+ return False
300
+ # If self.k > 1, uses the k nearest neighbors (kNN); otherwise, checks the detector
301
+ # without considering kNN.
302
+ if self.k > 1:
303
+ knn_list = np.empty(shape=0)
304
+ for i in samples_index_class:
305
+ # Calculates the distance between the two vectors and adds it to the kNN list if
306
+ # the distance is smaller than the largest distance in the list.
307
+ knn_list = self.__compare_knearest_neighbors_list(
308
+ knn_list, self.__distance(X[i], vector_x)
309
+ )
310
+ # If the average of the distances in the kNN list is less than the radius, Returns true.
311
+ distance_mean = np.mean(knn_list)
312
+ if self._algorithm == "V-detector":
313
+ return self.__detector_is_valid_to_vdetector(distance_mean, vector_x)
314
+ if distance_mean > (self.r + self.r_s):
315
+ return True
316
+ else:
317
+ distance: Union[float, None] = None
318
+ if self._algorithm == "V-detector":
319
+ distance = min(
320
+ self.__distance(X[i], vector_x) for i in samples_index_class
321
+ )
322
+ return self.__detector_is_valid_to_vdetector(distance, vector_x)
323
+
324
+ # Calculates the distance between the vectors; if not it is less than or equal to
325
+ # the radius plus the sample's radius, sets the validity of the detector to
326
+ # true.
327
+ threshold: float = self.r + self.r_s
328
+ if all(self.__distance(X[i], vector_x) > threshold for i in samples_index_class):
329
+ return True # Detector is valid!
330
+
331
+ return False # Detector is not valid!
332
+
333
+ def __compare_knearest_neighbors_list(
334
+ self,
335
+ knn: npt.NDArray,
336
+ distance: float
337
+ ) -> npt.NDArray:
338
+ """
339
+ Compares the k-nearest neighbor distance at position ``k-1`` in the list ``knn``,
340
+ if the distance of the new sample is less, replace it and sort in ascending order.
341
+
342
+
343
+ Parameters
344
+ ----------
345
+ * knn (``npt.NDArray``)
346
+ List of k-nearest neighbor distances.
347
+ * distance (``float``)
348
+ Distance to check.
349
+
350
+ Returns
351
+ ----------
352
+ * ``npt.NDArray``: Updated and sorted nearest neighbor list.
353
+ """
354
+ # If the number of distances in kNN is less than k, adds the distance.
355
+ if len(knn) < self.k:
356
+ knn = np.append(knn, distance)
357
+ knn.sort()
358
+ return knn
359
+
360
+ # Otherwise, add the distance if the new distance is smaller than the largest
361
+ # distance in the list.
362
+ if knn[self.k - 1] > distance:
363
+ knn[self.k - 1] = distance
364
+ knn.sort()
365
+
366
+ return knn
367
+
368
+ def __compare_sample_to_detectors(self, line: npt.NDArray):
369
+ """
370
+ Function to compare a sample with the detectors, verifying if the sample is proper.
371
+
372
+ Parameters
373
+ ----------
374
+ * line (``npt.NDArray``): vector with N-features
375
+
376
+ Returns
377
+ ----------
378
+ * Returns the predicted class with the detectors or None if the sample does not qualify
379
+ for any class.
380
+ """
381
+ # List to store the classes and the average distance between the detectors and the sample.
382
+ possible_classes = []
383
+ for _class_ in self.classes:
384
+ # Variable to indicate if the class was found with the detectors.
385
+ class_found: bool = True
386
+ sum_distance = 0
387
+ for detector in self.detectors[_class_]:
388
+ distance = self.__distance(detector.position, line)
389
+ sum_distance += distance
390
+ if self._algorithm == "V-detector":
391
+ if distance <= detector.radius:
392
+ class_found = False
393
+ break
394
+ elif distance <= self.r:
395
+ class_found = False
396
+ break
397
+
398
+ # If the sample passes through all the detectors of a class, adds the class as a
399
+ # possible prediction.
400
+ if class_found:
401
+ possible_classes.append([_class_, sum_distance / self.N])
402
+ # If classified as belonging to only one class, Returns the class.
403
+ if len(possible_classes) == 1:
404
+ return possible_classes[0][0]
405
+ # If belonging to more than one class, Returns the class with the greatest average distance.
406
+ if len(possible_classes) > 1:
407
+ return max(possible_classes, key=lambda x: x[1])[0]
408
+
409
+ return None
410
+
411
+ def __distance(self, u: npt.NDArray, v: npt.NDArray) -> float:
412
+ """
413
+ Function to calculate the distance between two points by the chosen ``metric``.
414
+
415
+ Parameters
416
+ ----------
417
+ * u (``npt.NDArray``): Coordinates of the first point.
418
+ * v (``npt.NDArray``): Coordinates of the second point.
419
+
420
+ Returns
421
+ ----------
422
+ * Distance (``float``): between the two points.
423
+ """
424
+ return super()._distance(u, v)
425
+
426
+ def __detector_is_valid_to_vdetector(
427
+ self,
428
+ distance: float,
429
+ vector_x: npt.NDArray
430
+ ) -> Union[bool, tuple[bool, float]]:
431
+ """
432
+ Check if the distance between the detector and the samples, minus the radius of the samples,
433
+ is greater than the minimum radius.
434
+
435
+ Parameters
436
+ ----------
437
+ * distance (``float``): minimum distance calculated between all samples.
438
+ * vector_x (``numpy.ndarray``): randomly generated candidate detector vector x with
439
+ values between 0 and 1.
440
+
441
+ Returns
442
+ ----------
443
+ * ``False`` if the calculated radius is smaller than the minimum distance or exceeds the
444
+ edge of the space, if this option is enabled.
445
+ * ``True`` and the distance minus the radius of the samples, if the radius is valid.`
446
+ """
447
+ new_detector_r = float(distance - self.r_s)
448
+ if self.r >= new_detector_r:
449
+ return False
450
+
451
+ # If _cell_bounds is True, considers the detector to be within the plane bounds.
452
+ if self._cell_bounds:
453
+ for p in vector_x:
454
+ if (p - new_detector_r) < 0 or (p + new_detector_r) > 1:
455
+ return False
456
+
457
+ return (True, new_detector_r)
458
+
459
+ def get_params(self, deep: bool = True) -> dict: # pylint: disable=W0613
460
+ """
461
+ The get_params function Returns a dictionary with the object's main parameters.
462
+ """
463
+ return {
464
+ "N": self.N,
465
+ "r": self.r,
466
+ "k": self.k,
467
+ "metric": self.metric,
468
+ "seed": self.seed,
469
+ "algorithm": self._algorithm,
470
+ "r_s": self.r_s,
471
+ "cell_bounds": self._cell_bounds,
472
+ "p": self.p,
473
+ }
474
+
475
+
476
+ class BNSA(Base):
477
+ """
478
+ The ``BNSA`` (Binary Negative Selection Algorithm) class is for classification and
479
+ identification purposes of anomalies through the self and not self method.
480
+
481
+ Parameters
482
+ ----------
483
+ * N (``int``): Number of detectors. Defaults to ``100``.
484
+ * aff_thresh (``float``): The variable represents the percentage of similarity
485
+ between the T cell and the own samples. The default value is 10% (0.1), while a value of
486
+ 1.0 represents 100% similarity.
487
+ * max_discards (``int``): This parameter indicates the maximum number of detector discards in
488
+ sequence, which aims to avoid a possible infinite loop if a radius is defined that it is
489
+ not possible to generate non-self detectors. Defaults to ``1000``.
490
+ * seed (``int``): Seed for the random generation of values in the detectors. Defaults to
491
+ ``None``.
492
+ * no_label_sample_selection (``str``): Method for selecting labels for samples designated as
493
+ non-self by all detectors. Available method types:
494
+ - (``max_average_difference``): Selects the class with the highest average difference
495
+ among the detectors.
496
+ - (``max_nearest_difference``): Selects the class with the highest difference between
497
+ the nearest and farthest detector from the sample.
498
+ """
499
+
500
+ def __init__(
501
+ self,
502
+ N: int = 100,
503
+ aff_thresh: float = 0.1,
504
+ max_discards: int = 1000,
505
+ seed: int = None,
506
+ no_label_sample_selection: Literal[
507
+ "max_average_difference", "max_nearest_difference"
508
+ ] = "max_average_difference"
509
+ ):
510
+ super().__init__()
511
+
512
+ self.N: int = sanitize_param(N, 100, lambda x: x > 0)
513
+ self.aff_thresh: float = sanitize_param(aff_thresh, 0.1, lambda x: 0 < x < 1)
514
+ self.max_discards: float = sanitize_param(max_discards, 1000, lambda x: x > 0)
515
+
516
+ self.seed = sanitize_seed(seed)
517
+
518
+ if self.seed is not None:
519
+ np.random.seed(seed)
520
+
521
+ self.no_label_sample_selection: float = sanitize_param(
522
+ no_label_sample_selection,
523
+ "max_average_difference",
524
+ lambda x: x == "nearest_difference"
525
+ )
526
+
527
+ self.classes: npt.NDArray = None
528
+ self.detectors: npt.NDArray = None
529
+
530
+ def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True):
531
+ """
532
+ The function ``fit(...)``, performs the training according to ``X`` and ``y``, using the
533
+ method negative selection method(``NegativeSelect``).
534
+
535
+ Parameters
536
+ ----------
537
+ * X (``npt.NDArray``):
538
+ Training array, containing the samples and their characteristics,
539
+ [``N samples`` (rows)][``N features`` (columns)].
540
+ * y (``npt.NDArray``):
541
+ Array of target classes of ``X`` with [``N samples`` (lines)].
542
+ verbose (``bool``): Feedback from detector generation to the user.
543
+
544
+ Returns
545
+ ----------
546
+ * (``self``): Returns the instance itself.
547
+ """
548
+ super()._check_and_raise_exceptions_fit(X, y, "BNSA")
549
+
550
+ # Converts the entire array X to boolean
551
+ if X.dtype != bool:
552
+ X = X.astype(bool)
553
+
554
+ # Identifying the possible classes within the output array `y`.
555
+ self.classes = np.unique(y)
556
+ # Dictionary that will store detectors with classes as keys.
557
+ list_detectors_by_class = {}
558
+ # Separates the classes for training.
559
+ sample_index: dict = self.__slice_index_list_by_class(y)
560
+ # Progress bar for generating all detectors.
561
+ if verbose:
562
+ progress = tqdm(
563
+ total=int(self.N * (len(self.classes))),
564
+ bar_format="{desc} ┇{bar}┇ {n}/{total} detectors",
565
+ postfix="\n",
566
+ )
567
+
568
+ for _class_ in self.classes:
569
+ # Initializes the empty set that will contain the valid detectors.
570
+ valid_detectors_set: list = []
571
+ discard_count: int = 0
572
+ # Updating the progress bar with the current class the algorithm is processing.
573
+ if verbose:
574
+ progress.set_description_str(
575
+ f"Generating the detectors for the {_class_} class:"
576
+ )
577
+ while len(valid_detectors_set) < self.N:
578
+ is_valid_detector: bool = True
579
+ # Generates a candidate detector vector randomly with values 0 and 1.
580
+ vector_x = np.random.choice([False, True], size=X.shape[1])
581
+ # Calculates the distance between the candidate and the class samples.
582
+ distances = cdist(
583
+ np.expand_dims(vector_x, axis=0),
584
+ X[sample_index[_class_]],
585
+ metric="hamming",
586
+ )
587
+ # Checks if any of the distances is below or equal to the threshold.
588
+ is_valid_detector = not np.any(distances <= self.aff_thresh)
589
+
590
+ # If the detector is valid, add it to the list of valid detectors.
591
+ if is_valid_detector:
592
+ discard_count = 0
593
+ valid_detectors_set.append(vector_x)
594
+ if verbose:
595
+ progress.update(1)
596
+ else:
597
+ discard_count += 1
598
+ if discard_count == self.max_discards:
599
+ raise MaxDiscardsReachedError(_class_)
600
+
601
+ # Add detectors to the dictionary with classes as keys.
602
+ list_detectors_by_class[_class_] = valid_detectors_set
603
+
604
+ # Notify the completion of detector generation for the classes.
605
+ if verbose:
606
+ progress.set_description(
607
+ f"\033[92m✔ Non-self detectors for classes ({', '.join(map(str, self.classes))}) "
608
+ f"successfully generated\033[0m"
609
+ )
610
+ # Saves the found detectors in the attribute for the class detectors.
611
+ self.detectors = list_detectors_by_class
612
+ return self
613
+
614
+ def predict(self, X: npt.NDArray) -> Optional[npt.NDArray]:
615
+ """
616
+ Function to perform the prediction of classes based on detectors
617
+ created after training.
618
+
619
+ Parameters
620
+ ----------
621
+ * X (``npt.NDArray``): Array with input samples with [``N samples`` (Lines)] and
622
+ [``N characteristics``(Columns)]
623
+
624
+ Returns
625
+ ----------
626
+ * c (``npt.NDArray``): an ndarray of the form ``C`` [``N samples``],
627
+ containing the predicted classes for ``X``
628
+ * ``None``: If there are no detectors for the prediction.
629
+ """
630
+ # If there are no detectors, Returns None.
631
+ if self.detectors is None:
632
+ return None
633
+
634
+ super()._check_and_raise_exceptions_predict(
635
+ X, len(self.detectors[self.classes[0]][0]), "BNSA"
636
+ )
637
+
638
+ # Converts the entire array X to boolean.
639
+ if X.dtype != bool:
640
+ X = X.astype(bool)
641
+
642
+ # Initializes an empty array that will store the predictions.
643
+ c = np.empty(shape=0)
644
+ # For each sample row in X.
645
+ for line in X:
646
+ class_found: bool = True
647
+ # List to store the possible classes to which the sample matches with self
648
+ # when compared to the non-self detectors.
649
+ possible_classes: list = []
650
+ for _class_ in self.classes:
651
+ similarity_sum: float = 0
652
+ # Calculates the Hamming distance between the row and all detectors.
653
+ distances = cdist(
654
+ np.expand_dims(line, axis=0),
655
+ self.detectors[_class_],
656
+ metric="hamming",
657
+ )
658
+
659
+ # Check if any distance is below or equal to the threshold.
660
+ if np.any(distances <= self.aff_thresh):
661
+ class_found = False
662
+ else:
663
+ similarity_sum = np.sum(distances)
664
+
665
+ # If the sample passes through all detectors of a class, adds the class as a
666
+ # possible prediction and its average similarity.
667
+ if class_found:
668
+ possible_classes.append([_class_, similarity_sum / self.N])
669
+
670
+ # If belonging to one or more classes, adds the class with the greatest
671
+ # average distance
672
+ if len(possible_classes) > 0:
673
+ c = np.append(c, [max(possible_classes, key=lambda x: x[1])[0]])
674
+ class_found = True
675
+ else:
676
+ class_found = False
677
+
678
+ # If there is only one class and the sample is not classified, sets the
679
+ # output as non-self.
680
+ if not class_found and len(self.classes) == 1:
681
+ c = np.append(c, ["non-self"])
682
+ # If the class cannot be identified by the detectors
683
+ elif not class_found:
684
+ c = self.__assign_class_to_non_self_sample(line, c)
685
+
686
+ return c
687
+
688
+ def __assign_class_to_non_self_sample(self, line, c) -> npt.NDArray:
689
+ """
690
+ This function determines the class of a sample when all detectors classify it
691
+ as "non-self". Classification is performed using the ``max_average_difference``
692
+ and ``max_nearest_difference`` methods.
693
+
694
+ Parameters
695
+ ----------
696
+ * line (list): Sample to be classified.
697
+ * c (list): List of predictions to be updated with the new classification.
698
+
699
+ Returns
700
+ ----------
701
+ * list: The list of predictions `c` updated with the class assigned to the sample.
702
+ """
703
+ class_differences: dict = {}
704
+ for _class_ in self.classes:
705
+ # Assign the label to the class with the greatest distance from
706
+ # the nearest detector.
707
+ if self.no_label_sample_selection == "nearest_difference":
708
+ difference_min: float = cdist(
709
+ np.expand_dims(line, axis=0),
710
+ self.detectors[_class_],
711
+ metric="hamming",
712
+ ).min()
713
+ class_differences[_class_] = difference_min
714
+ # Or based on the greatest distance from the average distances of the detectors.
715
+ else:
716
+ difference_sum: float = cdist(
717
+ np.expand_dims(line, axis=0),
718
+ self.detectors[_class_],
719
+ metric="hamming",
720
+ ).sum()
721
+ class_differences[_class_] = difference_sum / self.N
722
+
723
+ return np.append(c, [max(class_differences, key=class_differences.get)])
724
+
725
+ def __slice_index_list_by_class(self, y: npt.NDArray) -> dict:
726
+ """
727
+ The function ``__slice_index_list_by_class(...)``, separates the indices of the lines
728
+ according to the output class, to loop through the sample array, only in positions where
729
+ the output is the class being trained.
730
+
731
+ Parameters
732
+ ----------
733
+ * y (``npt.NDArray``):
734
+ Receives a ``y``[``N sample``] array with the output classes of the ``X``
735
+ sample array.
736
+
737
+ Returns
738
+ ----------
739
+ * dict: A dictionary with the list of array positions(``y``), with the classes as key.
740
+ """
741
+ return slice_index_list_by_class(self.classes, y)
742
+
743
+ def get_params(self, deep: bool = True) -> dict: # pylint: disable=W0613
744
+ """
745
+ The get_params function Returns a dictionary with the object's main parameters.
746
+ """
747
+ return {
748
+ "N": self.N,
749
+ "aff_thresh": self.aff_thresh,
750
+ "max_discards": self.max_discards,
751
+ "seed": self.seed,
752
+ }