aisp 0.1.34__py3-none-any.whl → 0.1.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,682 @@
1
+ """Negative Selection Algorithm."""
2
+
3
+ from typing import Dict, Literal, Optional, Union
4
+ from tqdm import tqdm
5
+
6
+ import numpy as np
7
+ import numpy.typing as npt
8
+
9
+ from ._ns_core import (
10
+ check_detector_bnsa_validity,
11
+ bnsa_class_prediction,
12
+ check_detector_rnsa_validity,
13
+ )
14
+ from ..exceptions import MaxDiscardsReachedError
15
+ from ..utils import slice_index_list_by_class
16
+ from ..utils.distance import (
17
+ min_distance_to_class_vectors,
18
+ get_metric_code,
19
+ compute_metric_distance,
20
+ )
21
+ from ..utils.sanitizers import sanitize_seed, sanitize_choice, sanitize_param
22
+ from ._base import BaseNSA, Detector
23
+
24
+
25
+ class RNSA(BaseNSA):
26
+ """
27
+ The ``RNSA`` (Real-Valued Negative Selection Algorithm) class is for classification and
28
+ identification purposes. of anomalies through the self and not self method.
29
+
30
+ Parameters
31
+ ----------
32
+ * N (``int``): Number of detectors. Defaults to ``100``.
33
+ * r (``float``): Radius of the detector. Defaults to ``0.05``.
34
+ * r_s (``float``): rₛ Radius of the ``X`` own samples. Defaults to ``0.0001``.
35
+ * k (``int``): Number of neighbors near the randomly generated detectors to perform the
36
+ distance average calculation. Defaults to ``1``.
37
+ * metric (``str``): Way to calculate the distance between the detector and the sample:
38
+ + ``'Euclidean'`` ➜ The calculation of the distance is given by the expression:
39
+ √( (x₁ – x₂)² + (y₁ – y₂)² + ... + (yn – yn)²).
40
+ + ``'minkowski'`` ➜ The calculation of the distance is given by the expression:
41
+ ( |X₁ – Y₁|p + |X₂ – Y₂|p + ... + |Xn – Yn|p) ¹/ₚ.
42
+ + ``'manhattan'`` ➜ The calculation of the distance is given by the expression:
43
+ ( |x₁ – x₂| + |y₁ – y₂| + ... + |yn – yn|) .
44
+
45
+ Defaults to ``'euclidean'``.
46
+ * max_discards (``int``): This parameter indicates the maximum number of consecutive
47
+ detector discards, aimed at preventing a possible infinite loop in case a radius
48
+ is defined that cannot generate non-self detectors. Defaults to ``1000``.
49
+ * seed (``int``): Seed for the random generation of values in the detectors. Defaults to
50
+ ``None``.
51
+ * algorithm(``str``), Set the algorithm version:
52
+ * ``'default-NSA'``: Default algorithm with fixed radius.
53
+ * ``'V-detector'``: This algorithm is based on the article \
54
+ [Real-Valued Negative Selection Algorithm with Variable-Sized Detectors][2], \
55
+ by Ji, Z., Dasgupta, D. (2004), and uses a variable radius for anomaly \
56
+ detection in feature spaces.
57
+
58
+ Defaults to ``'default-NSA'``.
59
+
60
+ * ``**kwargs``:
61
+ - non_self_label (``str``): This variable stores the label that will be assigned \
62
+ when the data has only one output class, and the sample is classified as not \
63
+ belonging to that class. Defaults to ``'non-self'``.
64
+ - cell_bounds (``bool``): If set to ``True``, this option limits the generation \
65
+ of detectors to the space within the plane between 0 and 1. This means that \
66
+ any detector whose radius exceeds this limit is discarded, this variable is \
67
+ only used in the ``V-detector`` algorithm. Defaults to ``False``.
68
+ - p (``float``): This parameter stores the value of ``p`` used in the Minkowski \
69
+ distance. The default is ``2``, which represents normalized Euclidean distance.\
70
+ Different values of p lead to different variants of the [Minkowski Distance][1].
71
+
72
+ Notes
73
+ ----------
74
+ [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.minkowski_distance.html
75
+
76
+ [2] https://doi.org/10.1007/978-3-540-24854-5_30
77
+
78
+ """
79
+
80
+ def __init__(
81
+ self,
82
+ N: int = 100,
83
+ r: float = 0.05,
84
+ r_s: float = 0.0001,
85
+ k: int = 1,
86
+ metric: Literal["manhattan", "minkowski", "euclidean"] = "euclidean",
87
+ max_discards: int = 1000,
88
+ seed: int = None,
89
+ algorithm: Literal["default-NSA", "V-detector"] = "default-NSA",
90
+ **kwargs: Dict[str, Union[bool, str, float]],
91
+ ):
92
+ self.metric = sanitize_choice(metric, ["manhattan", "minkowski"], "euclidean")
93
+ self.seed = sanitize_seed(seed)
94
+ if self.seed is not None:
95
+ np.random.seed(seed)
96
+ self.k: int = sanitize_param(k, 1, lambda x: x > 1)
97
+ self.N: int = sanitize_param(N, 100, lambda x: x >= 1)
98
+ self.r: float = sanitize_param(r, 0.05, lambda x: x > 0)
99
+ self.r_s: float = sanitize_param(r_s, 0.0001, lambda x: x > 0)
100
+ self.algorithm: str = sanitize_param(
101
+ algorithm, "default-NSA", lambda x: x == "V-detector"
102
+ )
103
+ self.max_discards: int = sanitize_param(max_discards, 1000, lambda x: x > 0)
104
+
105
+ # Retrieves the variables from kwargs.
106
+ self.p: float = kwargs.get("p", 2)
107
+ self.cell_bounds: bool = kwargs.get("cell_bounds", False)
108
+ self.non_self_label: str = kwargs.get("non_self_label", "non-self")
109
+
110
+ # Initializes the other class variables as None.
111
+ self._detectors: Union[dict, None] = None
112
+ self.classes: npt.NDArray = None
113
+
114
+ def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True):
115
+ """
116
+ The function ``fit(...)``, performs the training according to ``X`` and ``y``, using the
117
+ method negative selection method(``NegativeSelect``).
118
+
119
+ Parameters
120
+ ----------
121
+ * X (``npt.NDArray``): Training array, containing the samples and their \
122
+ characteristics, [``N samples`` (rows)][``N features`` (columns)].
123
+ * y (``npt.NDArray``): Array of target classes of ``X`` with [``N samples`` (lines)].
124
+ verbose (``bool``): Feedback from detector generation to the user.
125
+
126
+ Raises
127
+ ----------
128
+ * TypeError: If X or y are not ndarrays or have incompatible shapes.
129
+ * MaxDiscardsReachedError: The maximum number of detector discards was reached during
130
+ maturation. Check the defined radius value and consider reducing it.
131
+
132
+ Returns
133
+ ----------
134
+ * (``self``): Returns the instance itself.
135
+ """
136
+ progress = None
137
+ super()._check_and_raise_exceptions_fit(X, y)
138
+
139
+ # Identifying the possible classes within the output array `y`.
140
+ self.classes = np.unique(y)
141
+ # Dictionary that will store detectors with classes as keys.
142
+ list_detectors_by_class = {}
143
+ # Separates the classes for training.
144
+ sample_index = self.__slice_index_list_by_class(y)
145
+ # Progress bar for generating all detectors.
146
+ if verbose:
147
+ progress = tqdm(
148
+ total=int(self.N * (len(self.classes))),
149
+ bar_format="{desc} ┇{bar}┇ {n}/{total} detectors",
150
+ postfix="\n",
151
+ )
152
+ for _class_ in self.classes:
153
+ # Initializes the empty set that will contain the valid detectors.
154
+ valid_detectors_set = []
155
+ discard_count = 0
156
+ x_class = X[sample_index[_class_]]
157
+ # Indicating which class the algorithm is currently processing for the progress bar.
158
+ if verbose:
159
+ progress.set_description_str(
160
+ f"Generating the detectors for the {_class_} class:"
161
+ )
162
+ while len(valid_detectors_set) < self.N:
163
+ # Generates a candidate detector vector randomly with values between 0 and 1.
164
+ vector_x = np.random.random_sample(size=X.shape[1])
165
+ # Checks the validity of the detector for non-self with respect to the class samples
166
+ valid_detector = self.__checks_valid_detector(x_class, vector_x)
167
+
168
+ # If the detector is valid, add it to the list of valid detectors.
169
+ if valid_detector is not False:
170
+ discard_count = 0
171
+ radius = valid_detector[1] if self.algorithm == "V-detector" else None
172
+ valid_detectors_set.append(Detector(vector_x, radius))
173
+ if verbose:
174
+ progress.update(1)
175
+ else:
176
+ discard_count += 1
177
+ if discard_count == self.max_discards:
178
+ raise MaxDiscardsReachedError(_class_)
179
+
180
+ # Add detectors, with classes as keys in the dictionary.
181
+ list_detectors_by_class[_class_] = np.array(valid_detectors_set)
182
+ # Notify completion of detector generation for the classes.
183
+ if verbose:
184
+ progress.set_description(
185
+ f"\033[92m✔ Non-self detectors for classes ({', '.join(map(str, self.classes))}) "
186
+ f"successfully generated\033[0m"
187
+ )
188
+ # Saves the found detectors in the attribute for the non-self detectors of the trained model
189
+ self._detectors = list_detectors_by_class
190
+ return self
191
+
192
+ def predict(self, X: npt.NDArray) -> Optional[npt.NDArray]:
193
+ """
194
+ Function to perform the prediction of classes based on detectors
195
+ created after training.
196
+
197
+ Parameters
198
+ ----------
199
+ * X (``npt.NDArray``)
200
+ Array with input samples with [``N samples`` (Lines)] and
201
+ [``N characteristics``(Columns)]
202
+
203
+ Raises
204
+ ----------
205
+ * TypeError
206
+ If X is not an ndarray or list.
207
+ * FeatureDimensionMismatch
208
+ If the number of features in X does not match the expected number.
209
+
210
+ Returns
211
+ ----------
212
+ * C (``npt.NDArray``)
213
+ an ndarray of the form ``C`` [``N samples``], containing the predicted classes
214
+ for ``X``.
215
+ * ``None``
216
+ If there are no detectors for the prediction.
217
+ """
218
+ # If there are no detectors, Returns None.
219
+ if self._detectors is None:
220
+ return None
221
+
222
+ super()._check_and_raise_exceptions_predict(
223
+ X, len(self._detectors[self.classes[0]][0].position)
224
+ )
225
+
226
+ # Initializes an empty array that will store the predictions.
227
+ c = []
228
+ # For each sample row in X.
229
+ for line in X:
230
+ class_found: bool
231
+ _class_ = self.__compare_sample_to_detectors(line)
232
+ if _class_ is None:
233
+ class_found = False
234
+ else:
235
+ c.append(_class_)
236
+ class_found = True
237
+
238
+ # If there is only one class and the sample is not classified,
239
+ # set the output as non-self.
240
+ if not class_found and len(self.classes) == 1:
241
+ c.append(self.non_self_label)
242
+ # If the class is not identified with the detectors, assign the class with
243
+ # the greatest distance from the mean of its detectors.
244
+ elif not class_found:
245
+ average_distance: dict = {}
246
+ for _class_ in self.classes:
247
+ detectores = list(
248
+ map(lambda x: x.position, self._detectors[_class_])
249
+ )
250
+ average_distance[_class_] = np.average(
251
+ [self.__distance(detector, line) for detector in detectores]
252
+ )
253
+ c.append(max(average_distance, key=average_distance.get))
254
+ return np.array(c)
255
+
256
+ def __slice_index_list_by_class(self, y: npt.NDArray) -> dict:
257
+ """
258
+ The function ``__slice_index_list_by_class(...)``, separates the indices of the lines
259
+ according to the output class, to loop through the sample array, only in positions where
260
+ the output is the class being trained.
261
+
262
+ Parameters
263
+ ----------
264
+ * y (npt.NDArray)
265
+ Receives a ``y``[``N sample``] array with the output classes of the \
266
+ ``X`` sample array.
267
+
268
+ Returns
269
+ ----------
270
+ * dict: A dictionary with the list of array positions(``y``), with the classes as key.
271
+ """
272
+ return slice_index_list_by_class(self.classes, y)
273
+
274
+ def __checks_valid_detector(
275
+ self, x_class: npt.NDArray = None, vector_x: npt.NDArray = None
276
+ ) -> Union[bool, tuple[bool, float]]:
277
+ """
278
+ Function to check if the detector has a valid non-proper ``r`` radius for the class.
279
+
280
+ Parameters
281
+ ----------
282
+ * x_class (``npt.NDArray``)
283
+ Array ``x_class`` with the samples per class.
284
+ * vector_x (``npt.NDArray``)
285
+ Randomly generated vector x candidate detector with values between[0, 1].
286
+
287
+ Returns
288
+ ----------
289
+ * Validity (``bool``): Returns whether the detector is valid or not.
290
+ """
291
+ # If any of the input arrays have zero size, Returns false.
292
+ if np.size(x_class) == 0 or np.size(vector_x) == 0:
293
+ return False
294
+ # If self.k > 1, uses the k nearest neighbors (kNN); otherwise, checks the detector
295
+ # without considering kNN.
296
+ if self.k > 1:
297
+ knn_list = np.empty(shape=0)
298
+ for x in x_class:
299
+ # Calculates the distance between the two vectors and adds it to the kNN list if
300
+ # the distance is smaller than the largest distance in the list.
301
+ knn_list = self.__compare_knearest_neighbors_list(
302
+ knn_list, self.__distance(x, vector_x)
303
+ )
304
+ # If the average of the distances in the kNN list is less than the radius, Returns true.
305
+ distance_mean = np.mean(knn_list)
306
+ if self.algorithm == "V-detector":
307
+ return self.__detector_is_valid_to_vdetector(distance_mean, vector_x)
308
+ if distance_mean > (self.r + self.r_s):
309
+ return True
310
+ else:
311
+ if self.algorithm == "V-detector":
312
+ distance = min_distance_to_class_vectors(
313
+ x_class, vector_x, get_metric_code(self.metric), self.p
314
+ )
315
+ return self.__detector_is_valid_to_vdetector(distance, vector_x)
316
+
317
+ # Calculates the distance between the vectors; if not it is less than or equal to
318
+ # the radius plus the sample's radius, sets the validity of the detector to
319
+ # true.
320
+ threshold: float = self.r + self.r_s
321
+ if check_detector_rnsa_validity(
322
+ x_class, vector_x, threshold, get_metric_code(self.metric), self.p
323
+ ):
324
+ return True # Detector is valid!
325
+
326
+ return False # Detector is not valid!
327
+
328
+ def __compare_knearest_neighbors_list(
329
+ self, knn: npt.NDArray, distance: float
330
+ ) -> npt.NDArray:
331
+ """
332
+ Compares the k-nearest neighbor distance at position ``k-1`` in the list ``knn``,
333
+ if the distance of the new sample is less, replace it and sort in ascending order.
334
+
335
+
336
+ Parameters
337
+ ----------
338
+ * knn (``npt.NDArray``)
339
+ List of k-nearest neighbor distances.
340
+ * distance (``float``)
341
+ Distance to check.
342
+
343
+ Returns
344
+ ----------
345
+ * ``npt.NDArray``: Updated and sorted nearest neighbor list.
346
+ """
347
+ # If the number of distances in kNN is less than k, adds the distance.
348
+ if len(knn) < self.k:
349
+ knn = np.append(knn, distance)
350
+ knn.sort()
351
+ return knn
352
+
353
+ # Otherwise, add the distance if the new distance is smaller than the largest
354
+ # distance in the list.
355
+ if knn[self.k - 1] > distance:
356
+ knn[self.k - 1] = distance
357
+ knn.sort()
358
+
359
+ return knn
360
+
361
+ def __compare_sample_to_detectors(self, line: npt.NDArray):
362
+ """
363
+ Function to compare a sample with the detectors, verifying if the sample is proper.
364
+
365
+ Parameters
366
+ ----------
367
+ * line (``npt.NDArray``): vector with N-features
368
+
369
+ Returns
370
+ ----------
371
+ * Returns the predicted class with the detectors or None if the sample does not qualify
372
+ for any class.
373
+ """
374
+ # List to store the classes and the average distance between the detectors and the sample.
375
+ possible_classes = []
376
+ for _class_ in self.classes:
377
+ # Variable to indicate if the class was found with the detectors.
378
+ class_found: bool = True
379
+ sum_distance = 0
380
+ for detector in self._detectors[_class_]:
381
+ distance = self.__distance(detector.position, line)
382
+ sum_distance += distance
383
+ if self.algorithm == "V-detector":
384
+ if distance <= detector.radius:
385
+ class_found = False
386
+ break
387
+ elif distance <= self.r:
388
+ class_found = False
389
+ break
390
+
391
+ # If the sample passes through all the detectors of a class, adds the class as a
392
+ # possible prediction.
393
+ if class_found:
394
+ possible_classes.append([_class_, sum_distance / self.N])
395
+ # If classified as belonging to only one class, Returns the class.
396
+ if len(possible_classes) == 1:
397
+ return possible_classes[0][0]
398
+ # If belonging to more than one class, Returns the class with the greatest average distance.
399
+ if len(possible_classes) > 1:
400
+ return max(possible_classes, key=lambda x: x[1])[0]
401
+
402
+ return None
403
+
404
+ def __distance(self, u: npt.NDArray, v: npt.NDArray) -> float:
405
+ """
406
+ Function to calculate the distance between two points by the chosen ``metric``.
407
+
408
+ Parameters
409
+ ----------
410
+ * u (``npt.NDArray``): Coordinates of the first point.
411
+ * v (``npt.NDArray``): Coordinates of the second point.
412
+
413
+ Returns
414
+ ----------
415
+ * Distance (``float``): between the two points.
416
+ """
417
+ return compute_metric_distance(u, v, get_metric_code(self.metric), self.p)
418
+
419
+ def __detector_is_valid_to_vdetector(
420
+ self, distance: float, vector_x: npt.NDArray
421
+ ) -> Union[bool, tuple[bool, float]]:
422
+ """
423
+ Check if the distance between the detector and the samples, minus the radius of the samples,
424
+ is greater than the minimum radius.
425
+
426
+ Parameters
427
+ ----------
428
+ * distance (``float``): minimum distance calculated between all samples.
429
+ * vector_x (``numpy.ndarray``): randomly generated candidate detector vector x with
430
+ values between 0 and 1.
431
+
432
+ Returns
433
+ ----------
434
+ * ``False`` if the calculated radius is smaller than the minimum distance or exceeds the
435
+ edge of the space, if this option is enabled.
436
+ * ``True`` and the distance minus the radius of the samples, if the radius is valid.`
437
+ """
438
+ new_detector_r = float(distance - self.r_s)
439
+ if self.r >= new_detector_r:
440
+ return False
441
+
442
+ # If _cell_bounds is True, considers the detector to be within the plane bounds.
443
+ if self.cell_bounds:
444
+ for p in vector_x:
445
+ if (p - new_detector_r) < 0 or (p + new_detector_r) > 1:
446
+ return False
447
+
448
+ return (True, new_detector_r)
449
+
450
+
451
+ class BNSA(BaseNSA):
452
+ """
453
+ The ``BNSA`` (Binary Negative Selection Algorithm) class is for classification and
454
+ identification purposes of anomalies through the self and not self method.
455
+
456
+ Parameters
457
+ ----------
458
+ * N (``int``): Number of detectors. Defaults to ``100``.
459
+ * aff_thresh (``float``): The variable represents the percentage of similarity
460
+ between the T cell and the own samples. The default value is 10% (0.1), while a value of
461
+ 1.0 represents 100% similarity.
462
+ * max_discards (``int``): This parameter indicates the maximum number of detector discards in
463
+ sequence, which aims to avoid a possible infinite loop if a radius is defined that it is
464
+ not possible to generate non-self detectors. Defaults to ``1000``.
465
+ * seed (``int``): Seed for the random generation of values in the detectors. Defaults to
466
+ ``None``.
467
+ * no_label_sample_selection (``str``): Method for selecting labels for samples designated as
468
+ non-self by all detectors. Available method types:
469
+ - (``max_average_difference``): Selects the class with the highest average difference
470
+ among the detectors.
471
+ - (``max_nearest_difference``): Selects the class with the highest difference between
472
+ the nearest and farthest detector from the sample.
473
+ """
474
+
475
+ def __init__(
476
+ self,
477
+ N: int = 100,
478
+ aff_thresh: float = 0.1,
479
+ max_discards: int = 1000,
480
+ seed: int = None,
481
+ no_label_sample_selection: Literal[
482
+ "max_average_difference", "max_nearest_difference"
483
+ ] = "max_average_difference",
484
+ ):
485
+ super().__init__()
486
+
487
+ self.N: int = sanitize_param(N, 100, lambda x: x > 0)
488
+ self.aff_thresh: float = sanitize_param(aff_thresh, 0.1, lambda x: 0 < x < 1)
489
+ self.max_discards: float = sanitize_param(max_discards, 1000, lambda x: x > 0)
490
+
491
+ self.seed = sanitize_seed(seed)
492
+
493
+ if self.seed is not None:
494
+ np.random.seed(seed)
495
+
496
+ self.no_label_sample_selection: float = sanitize_param(
497
+ no_label_sample_selection,
498
+ "max_average_difference",
499
+ lambda x: x == "nearest_difference",
500
+ )
501
+
502
+ self.classes: npt.NDArray = None
503
+ self._detectors: Optional[dict] = None
504
+ self._detectors_stack: npt.NDArray = None
505
+
506
+ def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True):
507
+ """
508
+ The function ``fit(...)``, performs the training according to ``X`` and ``y``, using the
509
+ method negative selection method(``NegativeSelect``).
510
+
511
+ Parameters
512
+ ----------
513
+ * X (``npt.NDArray``):
514
+ Training array, containing the samples and their characteristics,
515
+ [``N samples`` (rows)][``N features`` (columns)].
516
+ * y (``npt.NDArray``):
517
+ Array of target classes of ``X`` with [``N samples`` (lines)].
518
+ verbose (``bool``): Feedback from detector generation to the user.
519
+
520
+ Returns
521
+ ----------
522
+ * (``self``): Returns the instance itself.
523
+ """
524
+ super()._check_and_raise_exceptions_fit(X, y, "BNSA")
525
+
526
+ # Converts the entire array X to boolean
527
+ X = X.astype(np.bool_)
528
+
529
+ # Identifying the possible classes within the output array `y`.
530
+ self.classes = np.unique(y)
531
+ # Dictionary that will store detectors with classes as keys.
532
+ list_detectors_by_class = {}
533
+ # Separates the classes for training.
534
+ sample_index: dict = self.__slice_index_list_by_class(y)
535
+ # Progress bar for generating all detectors.
536
+ if verbose:
537
+ progress = tqdm(
538
+ total=int(self.N * (len(self.classes))),
539
+ bar_format="{desc} ┇{bar}┇ {n}/{total} detectors",
540
+ postfix="\n",
541
+ )
542
+
543
+ for _class_ in self.classes:
544
+ # Initializes the empty set that will contain the valid detectors.
545
+ valid_detectors_set: list = []
546
+ discard_count: int = 0
547
+ # Updating the progress bar with the current class the algorithm is processing.
548
+ if verbose:
549
+ progress.set_description_str(
550
+ f"Generating the detectors for the {_class_} class:"
551
+ )
552
+ x_class = X[sample_index[_class_]]
553
+ while len(valid_detectors_set) < self.N:
554
+ # Generates a candidate detector vector randomly with values 0 and 1.
555
+ vector_x = np.random.randint(0, 2, size=X.shape[1]).astype(np.bool_)
556
+ # If the detector is valid, add it to the list of valid detectors.
557
+ if check_detector_bnsa_validity(x_class, vector_x, self.aff_thresh):
558
+ discard_count = 0
559
+ valid_detectors_set.append(vector_x)
560
+ if verbose:
561
+ progress.update(1)
562
+ else:
563
+ discard_count += 1
564
+ if discard_count == self.max_discards:
565
+ raise MaxDiscardsReachedError(_class_)
566
+
567
+ # Add detectors to the dictionary with classes as keys.
568
+ list_detectors_by_class[_class_] = np.array(valid_detectors_set)
569
+
570
+ # Notify the completion of detector generation for the classes.
571
+ if verbose:
572
+ progress.set_description(
573
+ f"\033[92m✔ Non-self detectors for classes ({', '.join(map(str, self.classes))}) "
574
+ f"successfully generated\033[0m"
575
+ )
576
+ # Saves the found detectors in the attribute for the class detectors.
577
+ self._detectors = list_detectors_by_class
578
+ self._detectors_stack = np.array(
579
+ [np.stack(self._detectors[class_name]) for class_name in self.classes]
580
+ )
581
+ return self
582
+
583
+ def predict(self, X: npt.NDArray) -> Optional[npt.NDArray]:
584
+ """
585
+ Function to perform the prediction of classes based on detectors
586
+ created after training.
587
+
588
+ Parameters
589
+ ----------
590
+ * X (``npt.NDArray``): Array with input samples with [``N samples`` (Lines)] and
591
+ [``N characteristics``(Columns)]
592
+
593
+ Returns
594
+ ----------
595
+ * c (``npt.NDArray``): an ndarray of the form ``C`` [``N samples``],
596
+ containing the predicted classes for ``X``
597
+ * ``None``: If there are no detectors for the prediction.
598
+ """
599
+ # If there are no detectors, Returns None.
600
+ if self._detectors is None:
601
+ return None
602
+
603
+ super()._check_and_raise_exceptions_predict(
604
+ X, len(self._detectors[self.classes[0]][0]), "BNSA"
605
+ )
606
+
607
+ # Converts the entire array X to boolean.
608
+ if X.dtype != bool:
609
+ X = X.astype(bool)
610
+
611
+ # Initializes an empty array that will store the predictions.
612
+ c = []
613
+ # For each sample row in X.
614
+ for line in X:
615
+ class_found: bool = True
616
+ # Class prediction based on detectors
617
+ class_index = bnsa_class_prediction(
618
+ line, self._detectors_stack, self.aff_thresh
619
+ )
620
+ # If belonging to one or more classes, adds the class with the greatest
621
+ # average distance
622
+ if class_index > -1:
623
+ c.append(self.classes[class_index])
624
+ class_found = True
625
+ else:
626
+ class_found = False
627
+
628
+ # If there is only one class and the sample is not classified, sets the
629
+ # output as non-self.
630
+ if not class_found and len(self.classes) == 1:
631
+ c.append("non-self")
632
+ # If the class cannot be identified by the detectors
633
+ elif not class_found:
634
+ self.__assign_class_to_non_self_sample(line, c)
635
+
636
+ return np.array(c)
637
+
638
+ def __assign_class_to_non_self_sample(self, line: npt.NDArray, c: list):
639
+ """
640
+ This function determines the class of a sample when all detectors classify it
641
+ as "non-self". Classification is performed using the ``max_average_difference``
642
+ and ``max_nearest_difference`` methods.
643
+
644
+ Parameters
645
+ ----------
646
+ * line (list): Sample to be classified.
647
+ * c (list): List of predictions to be updated with the new classification.
648
+
649
+ Returns
650
+ ----------
651
+ * list: The list of predictions `c` updated with the class assigned to the sample.
652
+ """
653
+ class_differences: dict = {}
654
+ for _class_ in self.classes:
655
+ distances = np.sum(line != self._detectors[_class_]) / self.N
656
+ # Assign the label to the class with the greatest distance from
657
+ # the nearest detector.
658
+ if self.no_label_sample_selection == "nearest_difference":
659
+ class_differences[_class_] = distances.min()
660
+ # Or based on the greatest distance from the average distances of the detectors.
661
+ else:
662
+ class_differences[_class_] = distances.sum() / self.N
663
+
664
+ c.append(max(class_differences, key=class_differences.get))
665
+
666
+ def __slice_index_list_by_class(self, y: npt.NDArray) -> dict:
667
+ """
668
+ The function ``__slice_index_list_by_class(...)``, separates the indices of the lines
669
+ according to the output class, to loop through the sample array, only in positions where
670
+ the output is the class being trained.
671
+
672
+ Parameters
673
+ ----------
674
+ * y (``npt.NDArray``):
675
+ Receives a ``y``[``N sample``] array with the output classes of the ``X``
676
+ sample array.
677
+
678
+ Returns
679
+ ----------
680
+ * dict: A dictionary with the list of array positions(``y``), with the classes as key.
681
+ """
682
+ return slice_index_list_by_class(self.classes, y)