aisp 0.3.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aisp-0.3.0 → aisp-0.3.1}/PKG-INFO +1 -1
- aisp-0.3.1/aisp/__init__.py +26 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/base/_classifier.py +2 -1
- {aisp-0.3.0 → aisp-0.3.1}/aisp/base/_clusterer.py +2 -1
- {aisp-0.3.0 → aisp-0.3.1}/aisp/csa/_ai_recognition_sys.py +10 -9
- {aisp-0.3.0 → aisp-0.3.1}/aisp/csa/_base.py +0 -1
- {aisp-0.3.0 → aisp-0.3.1}/aisp/ina/_ai_network.py +7 -8
- {aisp-0.3.0 → aisp-0.3.1}/aisp/nsa/__init__.py +2 -1
- aisp-0.3.1/aisp/nsa/_binary_negative_selection.py +239 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/nsa/_negative_selection.py +4 -231
- {aisp-0.3.0 → aisp-0.3.1}/aisp.egg-info/PKG-INFO +1 -1
- {aisp-0.3.0 → aisp-0.3.1}/aisp.egg-info/SOURCES.txt +1 -0
- {aisp-0.3.0 → aisp-0.3.1}/pyproject.toml +1 -1
- aisp-0.3.0/aisp/__init__.py +0 -4
- {aisp-0.3.0 → aisp-0.3.1}/LICENSE +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/README.md +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/base/__init__.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/base/_base.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/base/mutation.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/csa/__init__.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/csa/_cell.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/exceptions.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/ina/__init__.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/ina/_base.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/nsa/_base.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/nsa/_ns_core.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/utils/__init__.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/utils/_multiclass.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/utils/distance.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/utils/metrics.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/utils/sanitizers.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/utils/types.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp/utils/validation.py +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp.egg-info/dependency_links.txt +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp.egg-info/requires.txt +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/aisp.egg-info/top_level.txt +0 -0
- {aisp-0.3.0 → aisp-0.3.1}/setup.cfg +0 -0
@@ -0,0 +1,26 @@
|
|
1
|
+
"""AISP - Artificial Immune Systems Package.
|
2
|
+
|
3
|
+
AISP is a Python package of immunoinspired techniques that apply metaphors from the vertebrate
|
4
|
+
immune system to pattern recognition and optimization tasks.
|
5
|
+
|
6
|
+
The package is organized into specialized modules, each dedicated to a family of Artificial
|
7
|
+
Immune Systems algorithms:
|
8
|
+
- csa: Clonal Selection Algorithms
|
9
|
+
- nsa: Negative Selection Algorithms
|
10
|
+
- ina: Immune Network Algorithms
|
11
|
+
|
12
|
+
For detailed documentation and examples, visit:
|
13
|
+
https://ais-package.github.io/docs/intro
|
14
|
+
"""
|
15
|
+
|
16
|
+
from . import csa
|
17
|
+
from . import nsa
|
18
|
+
from . import ina
|
19
|
+
|
20
|
+
__author__ = "AISP Development Team"
|
21
|
+
__version__ = "0.3.1"
|
22
|
+
__all__ = [
|
23
|
+
'csa',
|
24
|
+
'nsa',
|
25
|
+
'ina'
|
26
|
+
]
|
@@ -1,5 +1,6 @@
|
|
1
1
|
"""Base class for classification algorithm."""
|
2
2
|
|
3
|
+
from __future__ import annotations
|
3
4
|
from abc import ABC, abstractmethod
|
4
5
|
from typing import Optional, Union
|
5
6
|
|
@@ -19,7 +20,7 @@ class BaseClassifier(ABC, Base):
|
|
19
20
|
classes: Union[npt.NDArray, list] = []
|
20
21
|
|
21
22
|
@abstractmethod
|
22
|
-
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True) ->
|
23
|
+
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True) -> BaseClassifier:
|
23
24
|
"""
|
24
25
|
Train the model using the input data X and corresponding labels y.
|
25
26
|
|
@@ -1,5 +1,6 @@
|
|
1
1
|
"""Base class for clustering algorithms."""
|
2
2
|
|
3
|
+
from __future__ import annotations
|
3
4
|
from abc import ABC, abstractmethod
|
4
5
|
from typing import Optional
|
5
6
|
|
@@ -17,7 +18,7 @@ class BaseClusterer(ABC, Base):
|
|
17
18
|
"""
|
18
19
|
|
19
20
|
@abstractmethod
|
20
|
-
def fit(self, X: npt.NDArray, verbose: bool = True) ->
|
21
|
+
def fit(self, X: npt.NDArray, verbose: bool = True) -> BaseClusterer:
|
21
22
|
"""
|
22
23
|
Train the model using the input data X.
|
23
24
|
|
@@ -1,5 +1,6 @@
|
|
1
1
|
"""Artificial Immune Recognition System (AIRS)."""
|
2
2
|
|
3
|
+
from __future__ import annotations
|
3
4
|
import random
|
4
5
|
from collections import Counter
|
5
6
|
from heapq import nlargest
|
@@ -189,6 +190,7 @@ class AIRS(BaseAIRS):
|
|
189
190
|
self.p: np.float64 = np.float64(kwargs.get("p", 2.0))
|
190
191
|
|
191
192
|
self._cells_memory = None
|
193
|
+
self._all_class_cell_vectors = None
|
192
194
|
self.affinity_threshold = 0.0
|
193
195
|
self.classes = []
|
194
196
|
self._bounds: Optional[npt.NDArray[np.float64]] = None
|
@@ -198,7 +200,7 @@ class AIRS(BaseAIRS):
|
|
198
200
|
"""Returns the trained cells memory, organized by class."""
|
199
201
|
return self._cells_memory
|
200
202
|
|
201
|
-
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True) ->
|
203
|
+
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True) -> AIRS:
|
202
204
|
"""
|
203
205
|
Fit the model to the training data using the AIRS.
|
204
206
|
|
@@ -301,6 +303,11 @@ class AIRS(BaseAIRS):
|
|
301
303
|
)
|
302
304
|
progress.close()
|
303
305
|
self._cells_memory = pool_cells_classes
|
306
|
+
self._all_class_cell_vectors = [
|
307
|
+
(class_name, cell.vector)
|
308
|
+
for class_name in self.classes
|
309
|
+
for cell in self._cells_memory[class_name]
|
310
|
+
]
|
304
311
|
return self
|
305
312
|
|
306
313
|
def predict(self, X: npt.NDArray) -> Optional[npt.NDArray]:
|
@@ -322,7 +329,7 @@ class AIRS(BaseAIRS):
|
|
322
329
|
An ndarray of the form ``C`` [``N samples``], containing the predicted classes for
|
323
330
|
``X``. or ``None``: If there are no detectors for the prediction.
|
324
331
|
"""
|
325
|
-
if self.
|
332
|
+
if self._all_class_cell_vectors is None:
|
326
333
|
return None
|
327
334
|
|
328
335
|
super()._check_and_raise_exceptions_predict(
|
@@ -331,16 +338,10 @@ class AIRS(BaseAIRS):
|
|
331
338
|
|
332
339
|
c: list = []
|
333
340
|
|
334
|
-
all_cells_memory = [
|
335
|
-
(class_name, cell.vector)
|
336
|
-
for class_name in self.classes
|
337
|
-
for cell in self._cells_memory[class_name]
|
338
|
-
]
|
339
|
-
|
340
341
|
for line in X:
|
341
342
|
label_stim_list = [
|
342
343
|
(class_name, self._affinity(memory, line))
|
343
|
-
for class_name, memory in
|
344
|
+
for class_name, memory in self._all_class_cell_vectors
|
344
345
|
]
|
345
346
|
# Create the list with the k nearest neighbors and select the class with the most votes
|
346
347
|
k_nearest = nlargest(self.k, label_stim_list, key=lambda x: x[1])
|
@@ -1,5 +1,6 @@
|
|
1
1
|
"""Artificial Immune Network (AiNet)."""
|
2
2
|
|
3
|
+
from __future__ import annotations
|
3
4
|
from collections import Counter
|
4
5
|
from heapq import nlargest
|
5
6
|
from typing import Optional
|
@@ -21,7 +22,7 @@ from ..utils.validation import detect_vector_data_type
|
|
21
22
|
|
22
23
|
|
23
24
|
class AiNet(BaseAiNet):
|
24
|
-
"""Artificial Immune Network for Compression and Clustering
|
25
|
+
"""Artificial Immune Network for Compression and Clustering.
|
25
26
|
|
26
27
|
This class implements the aiNet algorithm, an artificial immune network model designed for
|
27
28
|
clustering and data compression tasks. The aiNet algorithm uses principles from immune
|
@@ -77,13 +78,11 @@ class AiNet(BaseAiNet):
|
|
77
78
|
|
78
79
|
References
|
79
80
|
----------
|
80
|
-
.. [1]
|
81
|
-
|
82
|
-
Draft Chapter XII of the book *Data Mining: A Heuristic Approach*.
|
83
|
-
Department of Computer and Automation Engineering, University of Campinas.
|
81
|
+
.. [1] De Castro, Leandro & José, Fernando & von Zuben, Antonio Augusto. (2001). aiNet: An
|
82
|
+
Artificial Immune Network for Data Analysis.
|
84
83
|
Available at:
|
85
|
-
https://www.
|
86
|
-
|
84
|
+
https://www.researchgate.net/publication/
|
85
|
+
228378350_aiNet_An_Artificial_Immune_Network_for_Data_Analysis
|
87
86
|
.. [2] SciPy Documentation. *Minimum Spanning Tree*.
|
88
87
|
https://docs.scipy.org/doc/scipy/reference/generated/
|
89
88
|
scipy.sparse.csgraph.minimum_spanning_tree
|
@@ -175,7 +174,7 @@ class AiNet(BaseAiNet):
|
|
175
174
|
'std_distance': self._mst_std_distance
|
176
175
|
}
|
177
176
|
|
178
|
-
def fit(self, X: npt.NDArray, verbose: bool = True):
|
177
|
+
def fit(self, X: npt.NDArray, verbose: bool = True) -> AiNet:
|
179
178
|
"""
|
180
179
|
Train the AiNet model on input data.
|
181
180
|
|
@@ -5,7 +5,8 @@ distinguish between self and non-self. Only T-cells capable of recognizing non-s
|
|
5
5
|
preserved.
|
6
6
|
"""
|
7
7
|
|
8
|
-
from .
|
8
|
+
from ._binary_negative_selection import BNSA
|
9
|
+
from ._negative_selection import RNSA
|
9
10
|
|
10
11
|
__author__ = "João Paulo da Silva Barros"
|
11
12
|
__all__ = ["RNSA", "BNSA"]
|
@@ -0,0 +1,239 @@
|
|
1
|
+
"""Negative Selection Algorithm."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
from typing import Dict, Literal, Optional, Union
|
5
|
+
from tqdm import tqdm
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import numpy.typing as npt
|
9
|
+
|
10
|
+
from ._ns_core import (
|
11
|
+
check_detector_bnsa_validity,
|
12
|
+
bnsa_class_prediction
|
13
|
+
)
|
14
|
+
from ..exceptions import MaxDiscardsReachedError
|
15
|
+
from ..utils.sanitizers import sanitize_seed, sanitize_param
|
16
|
+
from ._base import BaseNSA
|
17
|
+
|
18
|
+
|
19
|
+
class BNSA(BaseNSA):
|
20
|
+
"""BNSA (Binary Negative Selection Algorithm).
|
21
|
+
|
22
|
+
Class is for classification and identification purposes of anomalies through the self and not
|
23
|
+
self method.
|
24
|
+
|
25
|
+
Parameters
|
26
|
+
----------
|
27
|
+
N : int, default=100
|
28
|
+
Number of detectors.
|
29
|
+
aff_thresh : float, default=0.1
|
30
|
+
The variable represents the percentage of similarity between the T cell and the own
|
31
|
+
samples. The default value is 10% (0.1), while a value of 1.0 represents 100% similarity.
|
32
|
+
max_discards : int, default=1000
|
33
|
+
This parameter indicates the maximum number of detector discards in sequence, which aims
|
34
|
+
to avoid a possible infinite loop if a radius is defined that it is not possible to
|
35
|
+
generate non-self detectors.
|
36
|
+
seed : Optional[int], default=None
|
37
|
+
Seed for the random generation of values in the detectors.
|
38
|
+
no_label_sample_selection : str, default="max_average_difference"
|
39
|
+
Method for selecting labels for samples designated as non-self by all detectors.
|
40
|
+
Available method types:
|
41
|
+
|
42
|
+
- max_average_difference - Selects the class with the highest average difference among the
|
43
|
+
detectors.
|
44
|
+
|
45
|
+
- max_nearest_difference - Selects the class with the highest difference between the
|
46
|
+
nearest and farthest detector from the sample.
|
47
|
+
"""
|
48
|
+
|
49
|
+
def __init__(
|
50
|
+
self,
|
51
|
+
N: int = 100,
|
52
|
+
aff_thresh: float = 0.1,
|
53
|
+
max_discards: int = 1000,
|
54
|
+
seed: Optional[int] = None,
|
55
|
+
no_label_sample_selection: Literal[
|
56
|
+
"max_average_difference", "max_nearest_difference"
|
57
|
+
] = "max_average_difference",
|
58
|
+
):
|
59
|
+
self.N: int = sanitize_param(N, 100, lambda x: x > 0)
|
60
|
+
self.aff_thresh: float = sanitize_param(aff_thresh, 0.1, lambda x: 0 < x < 1)
|
61
|
+
self.max_discards: float = sanitize_param(max_discards, 1000, lambda x: x > 0)
|
62
|
+
|
63
|
+
self.seed: Optional[int] = sanitize_seed(seed)
|
64
|
+
|
65
|
+
if self.seed is not None:
|
66
|
+
np.random.seed(seed)
|
67
|
+
|
68
|
+
self.no_label_sample_selection: str = sanitize_param(
|
69
|
+
no_label_sample_selection,
|
70
|
+
"max_average_difference",
|
71
|
+
lambda x: x == "nearest_difference",
|
72
|
+
)
|
73
|
+
|
74
|
+
self.classes: Union[npt.NDArray, list] = []
|
75
|
+
self._detectors: Optional[dict] = None
|
76
|
+
self._detectors_stack: Optional[npt.NDArray] = None
|
77
|
+
|
78
|
+
@property
|
79
|
+
def detectors(self) -> Optional[Dict[str, npt.NDArray[np.bool_]]]:
|
80
|
+
"""Returns the trained detectors, organized by class."""
|
81
|
+
return self._detectors
|
82
|
+
|
83
|
+
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True) -> BNSA:
|
84
|
+
"""Training according to X and y, using the method negative selection method.
|
85
|
+
|
86
|
+
Parameters
|
87
|
+
----------
|
88
|
+
X : npt.NDArray
|
89
|
+
Training array, containing the samples and their characteristics, [``N samples`` (
|
90
|
+
rows)][``N features`` (columns)].
|
91
|
+
y : npt.NDArray
|
92
|
+
Array of target classes of ``X`` with [``N samples`` (lines)].
|
93
|
+
verbose : bool, default=True
|
94
|
+
Feedback from detector generation to the user.
|
95
|
+
|
96
|
+
Returns
|
97
|
+
-------
|
98
|
+
self : BNSA
|
99
|
+
Returns the instance it self.
|
100
|
+
"""
|
101
|
+
super()._check_and_raise_exceptions_fit(X, y, "BNSA")
|
102
|
+
# Converts the entire array X to boolean
|
103
|
+
X = X.astype(np.bool_)
|
104
|
+
|
105
|
+
# Identifying the possible classes within the output array `y`.
|
106
|
+
self.classes = np.unique(y)
|
107
|
+
# Dictionary that will store detectors with classes as keys.
|
108
|
+
list_detectors_by_class = {}
|
109
|
+
# Separates the classes for training.
|
110
|
+
sample_index: dict = self._slice_index_list_by_class(y)
|
111
|
+
# Progress bar for generating all detectors.
|
112
|
+
|
113
|
+
progress = tqdm(
|
114
|
+
total=int(self.N * (len(self.classes))),
|
115
|
+
bar_format="{desc} ┇{bar}┇ {n}/{total} detectors",
|
116
|
+
postfix="\n",
|
117
|
+
disable=not verbose
|
118
|
+
)
|
119
|
+
|
120
|
+
for _class_ in self.classes:
|
121
|
+
# Initializes the empty set that will contain the valid detectors.
|
122
|
+
valid_detectors_set: list = []
|
123
|
+
discard_count: int = 0
|
124
|
+
# Updating the progress bar with the current class the algorithm is processing.
|
125
|
+
progress.set_description_str(
|
126
|
+
f"Generating the detectors for the {_class_} class:"
|
127
|
+
)
|
128
|
+
x_class = X[sample_index[_class_]]
|
129
|
+
while len(valid_detectors_set) < self.N:
|
130
|
+
# Generates a candidate detector vector randomly with values 0 and 1.
|
131
|
+
vector_x = np.random.randint(0, 2, size=X.shape[1]).astype(np.bool_)
|
132
|
+
# If the detector is valid, add it to the list of valid detectors.
|
133
|
+
if check_detector_bnsa_validity(x_class, vector_x, self.aff_thresh):
|
134
|
+
discard_count = 0
|
135
|
+
valid_detectors_set.append(vector_x)
|
136
|
+
progress.update(1)
|
137
|
+
else:
|
138
|
+
discard_count += 1
|
139
|
+
if discard_count == self.max_discards:
|
140
|
+
raise MaxDiscardsReachedError(_class_)
|
141
|
+
|
142
|
+
# Add detectors to the dictionary with classes as keys.
|
143
|
+
list_detectors_by_class[_class_] = np.array(valid_detectors_set)
|
144
|
+
|
145
|
+
# Notify the completion of detector generation for the classes.
|
146
|
+
progress.set_description(
|
147
|
+
f"\033[92m✔ Non-self detectors for classes ({', '.join(map(str, self.classes))}) "
|
148
|
+
f"successfully generated\033[0m"
|
149
|
+
)
|
150
|
+
progress.close()
|
151
|
+
# Saves the found detectors in the attribute for the class detectors.
|
152
|
+
self._detectors = list_detectors_by_class
|
153
|
+
self._detectors_stack = np.array(
|
154
|
+
[np.stack(self._detectors[class_name]) for class_name in self.classes]
|
155
|
+
)
|
156
|
+
return self
|
157
|
+
|
158
|
+
def predict(self, X: npt.NDArray) -> Optional[npt.NDArray]:
|
159
|
+
"""Prediction of classes based on detectors created after training.
|
160
|
+
|
161
|
+
Parameters
|
162
|
+
----------
|
163
|
+
X : npt.NDArray
|
164
|
+
Array with input samples with [``N_samples`` (Lines)] and [``N_characteristics``(
|
165
|
+
Columns)]
|
166
|
+
|
167
|
+
Returns
|
168
|
+
-------
|
169
|
+
c : Optional[npt.NDArray]
|
170
|
+
an ndarray of the form ``C`` [``N samples``], containing the predicted classes for
|
171
|
+
``X``. Returns``None``: If there are no detectors for the prediction.
|
172
|
+
"""
|
173
|
+
# If there are no detectors, Returns None.
|
174
|
+
if self._detectors is None or self._detectors_stack is None:
|
175
|
+
return None
|
176
|
+
|
177
|
+
super()._check_and_raise_exceptions_predict(
|
178
|
+
X, len(self._detectors[self.classes[0]][0]), "BNSA"
|
179
|
+
)
|
180
|
+
|
181
|
+
# Converts the entire array X to boolean.
|
182
|
+
if X.dtype != bool:
|
183
|
+
X = X.astype(bool)
|
184
|
+
|
185
|
+
# Initializes an empty array that will store the predictions.
|
186
|
+
c = []
|
187
|
+
# For each sample row in X.
|
188
|
+
for line in X:
|
189
|
+
class_found: bool = True
|
190
|
+
# Class prediction based on detectors
|
191
|
+
class_index = bnsa_class_prediction(
|
192
|
+
line, self._detectors_stack, self.aff_thresh
|
193
|
+
)
|
194
|
+
# If belonging to one or more classes, adds the class with the greatest
|
195
|
+
# average distance
|
196
|
+
if class_index > -1:
|
197
|
+
c.append(self.classes[class_index])
|
198
|
+
class_found = True
|
199
|
+
else:
|
200
|
+
class_found = False
|
201
|
+
|
202
|
+
# If there is only one class and the sample is not classified, sets the
|
203
|
+
# output as non-self.
|
204
|
+
if not class_found and len(self.classes) == 1:
|
205
|
+
c.append("non-self")
|
206
|
+
# If the class cannot be identified by the detectors
|
207
|
+
elif not class_found:
|
208
|
+
self.__assign_class_to_non_self_sample(line, c)
|
209
|
+
|
210
|
+
return np.array(c)
|
211
|
+
|
212
|
+
def __assign_class_to_non_self_sample(self, line: npt.NDArray, c: list):
|
213
|
+
"""Determine the class of a sample when all detectors classify it as "non-self".
|
214
|
+
|
215
|
+
Classification is performed using the ``max_average_difference`` and
|
216
|
+
``max_nearest_difference`` methods.
|
217
|
+
|
218
|
+
Parameters
|
219
|
+
----------
|
220
|
+
line : npt.NDArray
|
221
|
+
Sample to be classified.
|
222
|
+
c : list
|
223
|
+
List of predictions to be updated with the new classification.
|
224
|
+
"""
|
225
|
+
if self._detectors is None:
|
226
|
+
raise ValueError("Detectors is not initialized.")
|
227
|
+
|
228
|
+
class_differences: dict = {}
|
229
|
+
for _class_ in self.classes:
|
230
|
+
distances = np.sum(line != self._detectors[_class_]) / self.N
|
231
|
+
# Assign the label to the class with the greatest distance from
|
232
|
+
# the nearest detector.
|
233
|
+
if self.no_label_sample_selection == "nearest_difference":
|
234
|
+
class_differences[_class_] = distances.min()
|
235
|
+
# Or based on the greatest distance from the average distances of the detectors.
|
236
|
+
else:
|
237
|
+
class_differences[_class_] = distances.sum() / self.N
|
238
|
+
|
239
|
+
c.append(max(class_differences, key=class_differences.get))
|
@@ -1,5 +1,6 @@
|
|
1
1
|
"""Negative Selection Algorithm."""
|
2
2
|
|
3
|
+
from __future__ import annotations
|
3
4
|
from typing import Any, Dict, Literal, Optional, Union
|
4
5
|
from tqdm import tqdm
|
5
6
|
|
@@ -7,11 +8,7 @@ import numpy as np
|
|
7
8
|
import numpy.typing as npt
|
8
9
|
|
9
10
|
from ..base import set_seed_numba
|
10
|
-
from ._ns_core import
|
11
|
-
check_detector_bnsa_validity,
|
12
|
-
bnsa_class_prediction,
|
13
|
-
check_detector_rnsa_validity,
|
14
|
-
)
|
11
|
+
from ._ns_core import check_detector_rnsa_validity
|
15
12
|
from ..exceptions import MaxDiscardsReachedError
|
16
13
|
from ..utils.distance import (
|
17
14
|
min_distance_to_class_vectors,
|
@@ -123,7 +120,7 @@ class RNSA(BaseNSA):
|
|
123
120
|
"""Returns the trained detectors, organized by class."""
|
124
121
|
return self._detectors
|
125
122
|
|
126
|
-
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True) ->
|
123
|
+
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True) -> RNSA:
|
127
124
|
"""
|
128
125
|
Perform training according to X and y, using the negative selection method (NegativeSelect).
|
129
126
|
|
@@ -339,10 +336,9 @@ class RNSA(BaseNSA):
|
|
339
336
|
if len(knn) < self.k:
|
340
337
|
knn.append(distance)
|
341
338
|
knn.sort()
|
342
|
-
|
343
339
|
# Otherwise, add the distance if the new distance is smaller than the largest
|
344
340
|
# distance in the list.
|
345
|
-
|
341
|
+
elif knn[self.k - 1] > distance:
|
346
342
|
knn[self.k - 1] = distance
|
347
343
|
knn.sort()
|
348
344
|
|
@@ -447,226 +443,3 @@ class RNSA(BaseNSA):
|
|
447
443
|
return False
|
448
444
|
|
449
445
|
return True, new_detector_r
|
450
|
-
|
451
|
-
|
452
|
-
class BNSA(BaseNSA):
|
453
|
-
"""BNSA (Binary Negative Selection Algorithm).
|
454
|
-
|
455
|
-
Class is for classification and identification purposes of anomalies through the self and not
|
456
|
-
self method.
|
457
|
-
|
458
|
-
Parameters
|
459
|
-
----------
|
460
|
-
N : int, default=100
|
461
|
-
Number of detectors.
|
462
|
-
aff_thresh : float, default=0.1
|
463
|
-
The variable represents the percentage of similarity between the T cell and the own
|
464
|
-
samples. The default value is 10% (0.1), while a value of 1.0 represents 100% similarity.
|
465
|
-
max_discards : int, default=1000
|
466
|
-
This parameter indicates the maximum number of detector discards in sequence, which aims
|
467
|
-
to avoid a possible infinite loop if a radius is defined that it is not possible to
|
468
|
-
generate non-self detectors.
|
469
|
-
seed : Optional[int], default=None
|
470
|
-
Seed for the random generation of values in the detectors.
|
471
|
-
no_label_sample_selection : str, default="max_average_difference"
|
472
|
-
Method for selecting labels for samples designated as non-self by all detectors.
|
473
|
-
Available method types:
|
474
|
-
|
475
|
-
- max_average_difference - Selects the class with the highest average difference among the
|
476
|
-
detectors.
|
477
|
-
|
478
|
-
- max_nearest_difference - Selects the class with the highest difference between the
|
479
|
-
nearest and farthest detector from the sample.
|
480
|
-
"""
|
481
|
-
|
482
|
-
def __init__(
|
483
|
-
self,
|
484
|
-
N: int = 100,
|
485
|
-
aff_thresh: float = 0.1,
|
486
|
-
max_discards: int = 1000,
|
487
|
-
seed: Optional[int] = None,
|
488
|
-
no_label_sample_selection: Literal[
|
489
|
-
"max_average_difference", "max_nearest_difference"
|
490
|
-
] = "max_average_difference",
|
491
|
-
):
|
492
|
-
self.N: int = sanitize_param(N, 100, lambda x: x > 0)
|
493
|
-
self.aff_thresh: float = sanitize_param(aff_thresh, 0.1, lambda x: 0 < x < 1)
|
494
|
-
self.max_discards: float = sanitize_param(max_discards, 1000, lambda x: x > 0)
|
495
|
-
|
496
|
-
self.seed: Optional[int] = sanitize_seed(seed)
|
497
|
-
|
498
|
-
if self.seed is not None:
|
499
|
-
np.random.seed(seed)
|
500
|
-
|
501
|
-
self.no_label_sample_selection: str = sanitize_param(
|
502
|
-
no_label_sample_selection,
|
503
|
-
"max_average_difference",
|
504
|
-
lambda x: x == "nearest_difference",
|
505
|
-
)
|
506
|
-
|
507
|
-
self.classes: Union[npt.NDArray, list] = []
|
508
|
-
self._detectors: Optional[dict] = None
|
509
|
-
self._detectors_stack: Optional[npt.NDArray] = None
|
510
|
-
|
511
|
-
@property
|
512
|
-
def detectors(self) -> Optional[Dict[str, npt.NDArray[np.bool_]]]:
|
513
|
-
"""Returns the trained detectors, organized by class."""
|
514
|
-
return self._detectors
|
515
|
-
|
516
|
-
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True) -> "BNSA":
|
517
|
-
"""Training according to X and y, using the method negative selection method.
|
518
|
-
|
519
|
-
Parameters
|
520
|
-
----------
|
521
|
-
X : npt.NDArray
|
522
|
-
Training array, containing the samples and their characteristics, [``N samples`` (
|
523
|
-
rows)][``N features`` (columns)].
|
524
|
-
y : npt.NDArray
|
525
|
-
Array of target classes of ``X`` with [``N samples`` (lines)].
|
526
|
-
verbose : bool, default=True
|
527
|
-
Feedback from detector generation to the user.
|
528
|
-
|
529
|
-
Returns
|
530
|
-
-------
|
531
|
-
self : BNSA
|
532
|
-
Returns the instance it self.
|
533
|
-
"""
|
534
|
-
super()._check_and_raise_exceptions_fit(X, y, "BNSA")
|
535
|
-
# Converts the entire array X to boolean
|
536
|
-
X = X.astype(np.bool_)
|
537
|
-
|
538
|
-
# Identifying the possible classes within the output array `y`.
|
539
|
-
self.classes = np.unique(y)
|
540
|
-
# Dictionary that will store detectors with classes as keys.
|
541
|
-
list_detectors_by_class = {}
|
542
|
-
# Separates the classes for training.
|
543
|
-
sample_index: dict = self._slice_index_list_by_class(y)
|
544
|
-
# Progress bar for generating all detectors.
|
545
|
-
|
546
|
-
progress = tqdm(
|
547
|
-
total=int(self.N * (len(self.classes))),
|
548
|
-
bar_format="{desc} ┇{bar}┇ {n}/{total} detectors",
|
549
|
-
postfix="\n",
|
550
|
-
disable=not verbose
|
551
|
-
)
|
552
|
-
|
553
|
-
for _class_ in self.classes:
|
554
|
-
# Initializes the empty set that will contain the valid detectors.
|
555
|
-
valid_detectors_set: list = []
|
556
|
-
discard_count: int = 0
|
557
|
-
# Updating the progress bar with the current class the algorithm is processing.
|
558
|
-
progress.set_description_str(
|
559
|
-
f"Generating the detectors for the {_class_} class:"
|
560
|
-
)
|
561
|
-
x_class = X[sample_index[_class_]]
|
562
|
-
while len(valid_detectors_set) < self.N:
|
563
|
-
# Generates a candidate detector vector randomly with values 0 and 1.
|
564
|
-
vector_x = np.random.randint(0, 2, size=X.shape[1]).astype(np.bool_)
|
565
|
-
# If the detector is valid, add it to the list of valid detectors.
|
566
|
-
if check_detector_bnsa_validity(x_class, vector_x, self.aff_thresh):
|
567
|
-
discard_count = 0
|
568
|
-
valid_detectors_set.append(vector_x)
|
569
|
-
progress.update(1)
|
570
|
-
else:
|
571
|
-
discard_count += 1
|
572
|
-
if discard_count == self.max_discards:
|
573
|
-
raise MaxDiscardsReachedError(_class_)
|
574
|
-
|
575
|
-
# Add detectors to the dictionary with classes as keys.
|
576
|
-
list_detectors_by_class[_class_] = np.array(valid_detectors_set)
|
577
|
-
|
578
|
-
# Notify the completion of detector generation for the classes.
|
579
|
-
progress.set_description(
|
580
|
-
f"\033[92m✔ Non-self detectors for classes ({', '.join(map(str, self.classes))}) "
|
581
|
-
f"successfully generated\033[0m"
|
582
|
-
)
|
583
|
-
progress.close()
|
584
|
-
# Saves the found detectors in the attribute for the class detectors.
|
585
|
-
self._detectors = list_detectors_by_class
|
586
|
-
self._detectors_stack = np.array(
|
587
|
-
[np.stack(self._detectors[class_name]) for class_name in self.classes]
|
588
|
-
)
|
589
|
-
return self
|
590
|
-
|
591
|
-
def predict(self, X: npt.NDArray) -> Optional[npt.NDArray]:
|
592
|
-
"""Prediction of classes based on detectors created after training.
|
593
|
-
|
594
|
-
Parameters
|
595
|
-
----------
|
596
|
-
X : npt.NDArray
|
597
|
-
Array with input samples with [``N_samples`` (Lines)] and [``N_characteristics``(
|
598
|
-
Columns)]
|
599
|
-
|
600
|
-
Returns
|
601
|
-
-------
|
602
|
-
c : Optional[npt.NDArray]
|
603
|
-
an ndarray of the form ``C`` [``N samples``], containing the predicted classes for
|
604
|
-
``X``. Returns``None``: If there are no detectors for the prediction.
|
605
|
-
"""
|
606
|
-
# If there are no detectors, Returns None.
|
607
|
-
if self._detectors is None or self._detectors_stack is None:
|
608
|
-
return None
|
609
|
-
|
610
|
-
super()._check_and_raise_exceptions_predict(
|
611
|
-
X, len(self._detectors[self.classes[0]][0]), "BNSA"
|
612
|
-
)
|
613
|
-
|
614
|
-
# Converts the entire array X to boolean.
|
615
|
-
if X.dtype != bool:
|
616
|
-
X = X.astype(bool)
|
617
|
-
|
618
|
-
# Initializes an empty array that will store the predictions.
|
619
|
-
c = []
|
620
|
-
# For each sample row in X.
|
621
|
-
for line in X:
|
622
|
-
class_found: bool = True
|
623
|
-
# Class prediction based on detectors
|
624
|
-
class_index = bnsa_class_prediction(
|
625
|
-
line, self._detectors_stack, self.aff_thresh
|
626
|
-
)
|
627
|
-
# If belonging to one or more classes, adds the class with the greatest
|
628
|
-
# average distance
|
629
|
-
if class_index > -1:
|
630
|
-
c.append(self.classes[class_index])
|
631
|
-
class_found = True
|
632
|
-
else:
|
633
|
-
class_found = False
|
634
|
-
|
635
|
-
# If there is only one class and the sample is not classified, sets the
|
636
|
-
# output as non-self.
|
637
|
-
if not class_found and len(self.classes) == 1:
|
638
|
-
c.append("non-self")
|
639
|
-
# If the class cannot be identified by the detectors
|
640
|
-
elif not class_found:
|
641
|
-
self.__assign_class_to_non_self_sample(line, c)
|
642
|
-
|
643
|
-
return np.array(c)
|
644
|
-
|
645
|
-
def __assign_class_to_non_self_sample(self, line: npt.NDArray, c: list):
|
646
|
-
"""Determine the class of a sample when all detectors classify it as "non-self".
|
647
|
-
|
648
|
-
Classification is performed using the ``max_average_difference`` and
|
649
|
-
``max_nearest_difference`` methods.
|
650
|
-
|
651
|
-
Parameters
|
652
|
-
----------
|
653
|
-
line : npt.NDArray
|
654
|
-
Sample to be classified.
|
655
|
-
c : list
|
656
|
-
List of predictions to be updated with the new classification.
|
657
|
-
"""
|
658
|
-
if self._detectors is None:
|
659
|
-
raise ValueError("Detectors is not initialized.")
|
660
|
-
|
661
|
-
class_differences: dict = {}
|
662
|
-
for _class_ in self.classes:
|
663
|
-
distances = np.sum(line != self._detectors[_class_]) / self.N
|
664
|
-
# Assign the label to the class with the greatest distance from
|
665
|
-
# the nearest detector.
|
666
|
-
if self.no_label_sample_selection == "nearest_difference":
|
667
|
-
class_differences[_class_] = distances.min()
|
668
|
-
# Or based on the greatest distance from the average distances of the detectors.
|
669
|
-
else:
|
670
|
-
class_differences[_class_] = distances.sum() / self.N
|
671
|
-
|
672
|
-
c.append(max(class_differences, key=class_differences.get))
|
aisp-0.3.0/aisp/__init__.py
DELETED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|