aisp 0.1.42__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aisp/base/_classifier.py +6 -2
- aisp/base/mutation.py +130 -0
- aisp/csa/__init__.py +9 -0
- aisp/csa/_ai_immune_recognition_sys.py +498 -0
- aisp/csa/_base.py +104 -0
- aisp/csa/_cell.py +61 -0
- aisp/exceptions.py +17 -1
- aisp/nsa/_base.py +3 -3
- aisp/nsa/_negative_selection.py +43 -45
- aisp/utils/types.py +31 -0
- aisp/utils/validation.py +47 -0
- {aisp-0.1.42.dist-info → aisp-0.2.1.dist-info}/METADATA +14 -16
- aisp-0.2.1.dist-info/RECORD +25 -0
- {aisp-0.1.42.dist-info → aisp-0.2.1.dist-info}/WHEEL +1 -1
- aisp-0.1.42.dist-info/RECORD +0 -18
- {aisp-0.1.42.dist-info → aisp-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {aisp-0.1.42.dist-info → aisp-0.2.1.dist-info}/top_level.txt +0 -0
aisp/base/_classifier.py
CHANGED
@@ -16,10 +16,10 @@ class BaseClassifier(ABC):
|
|
16
16
|
``get_params`` method.
|
17
17
|
"""
|
18
18
|
|
19
|
-
classes:
|
19
|
+
classes: Union[npt.NDArray, list] = []
|
20
20
|
|
21
21
|
@abstractmethod
|
22
|
-
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True):
|
22
|
+
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True) -> "BaseClassifier":
|
23
23
|
"""
|
24
24
|
Train the model using the input data X and corresponding labels y.
|
25
25
|
|
@@ -83,6 +83,10 @@ class BaseClassifier(ABC):
|
|
83
83
|
if len(y) == 0:
|
84
84
|
return 0
|
85
85
|
y_pred = self.predict(X)
|
86
|
+
|
87
|
+
if y_pred is None:
|
88
|
+
return 0
|
89
|
+
|
86
90
|
return accuracy_score(y, y_pred)
|
87
91
|
|
88
92
|
def _slice_index_list_by_class(self, y: npt.NDArray) -> dict:
|
aisp/base/mutation.py
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
"""
|
2
|
+
The functions perform utilize Numba decorators for Just-In-Time compilation.
|
3
|
+
|
4
|
+
Contains functions that generate sets of mutated clones from continuous or binary vectors,
|
5
|
+
simulating the clonal expansion process in artificial immune systems.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
import numpy.typing as npt
|
10
|
+
from numba import njit, types
|
11
|
+
|
12
|
+
|
13
|
+
@njit([(types.float64[:], types.int64)], cache=True)
|
14
|
+
def clone_and_mutate_continuous(
|
15
|
+
vector: npt.NDArray[np.float64],
|
16
|
+
n: int
|
17
|
+
) -> npt.NDArray[np.float64]:
|
18
|
+
"""
|
19
|
+
Generate a set of mutated clones from a cell represented by a continuous vector.
|
20
|
+
|
21
|
+
This function creates `n` clones of the input vector and applies random mutations to each of
|
22
|
+
them, simulating the process of clonal expansion in artificial immune systems. Each clone
|
23
|
+
will have a random number of mutations applied in distinct positions of the original vector.
|
24
|
+
|
25
|
+
Parameters
|
26
|
+
----------
|
27
|
+
vector : npt.NDArray[np.float64]
|
28
|
+
The original immune cell with continuous values to be cloned and mutated.
|
29
|
+
n : int
|
30
|
+
The number of mutated clones to be generated.
|
31
|
+
|
32
|
+
Returns
|
33
|
+
-------
|
34
|
+
clone_set : npt.NDArray
|
35
|
+
An Array(n, len(vector)) containing the `n` mutated clones of the original vector.
|
36
|
+
"""
|
37
|
+
n_features = vector.shape[0]
|
38
|
+
clone_set = np.empty((n, n_features), dtype=np.float64)
|
39
|
+
for i in range(n):
|
40
|
+
n_mutations = np.random.randint(1, n_features)
|
41
|
+
clone = vector.copy()
|
42
|
+
position_mutations = np.random.permutation(n_features)[:n_mutations]
|
43
|
+
for j in range(n_mutations):
|
44
|
+
idx = position_mutations[j]
|
45
|
+
clone[idx] = np.float64(np.random.random())
|
46
|
+
clone_set[i] = clone
|
47
|
+
|
48
|
+
return clone_set
|
49
|
+
|
50
|
+
|
51
|
+
@njit([(types.boolean[:], types.int64)], cache=True)
|
52
|
+
def clone_and_mutate_binary(
|
53
|
+
vector: npt.NDArray[np.bool_],
|
54
|
+
n: int
|
55
|
+
) -> npt.NDArray[np.bool_]:
|
56
|
+
"""
|
57
|
+
Generate a set of mutated clones from a cell represented by a binary vector.
|
58
|
+
|
59
|
+
This function creates `n` clones of the input vector and applies random mutations to each of
|
60
|
+
them, changing some bits randomly. The process simulates clonal expansion in artificial
|
61
|
+
immune systems with discrete representations.
|
62
|
+
|
63
|
+
Parameters
|
64
|
+
----------
|
65
|
+
vector : npt.NDArray[np.bool_]
|
66
|
+
The original immune cell with binary values to be cloned and mutated.
|
67
|
+
n : int
|
68
|
+
The number of mutated clones to be generated.
|
69
|
+
|
70
|
+
Returns
|
71
|
+
-------
|
72
|
+
clone_set : npt.NDArray[np.bool_]
|
73
|
+
An Array(n, len(vector)) containing the `n` mutated clones of the original vector.
|
74
|
+
"""
|
75
|
+
n_features = vector.shape[0]
|
76
|
+
clone_set = np.empty((n, n_features), dtype=np.bool_)
|
77
|
+
for i in range(n):
|
78
|
+
n_mutations = np.random.randint(1, n_features)
|
79
|
+
clone = vector.copy()
|
80
|
+
position_mutations = np.random.permutation(n_features)[:n_mutations]
|
81
|
+
for j in range(n_mutations):
|
82
|
+
idx = position_mutations[j]
|
83
|
+
clone[idx] = np.bool_(np.random.randint(0, 2))
|
84
|
+
clone_set[i] = clone
|
85
|
+
|
86
|
+
return clone_set
|
87
|
+
|
88
|
+
|
89
|
+
@njit([(types.float64[:], types.int64, types.float64[:, :])], cache=True)
|
90
|
+
def clone_and_mutate_ranged(
|
91
|
+
vector: npt.NDArray[np.float64],
|
92
|
+
n: int,
|
93
|
+
bounds: npt.NDArray[np.float64]
|
94
|
+
) -> npt.NDArray[np.float64]:
|
95
|
+
"""
|
96
|
+
Generate a set of mutated clones from a cell represented by custom ranges per dimension.
|
97
|
+
|
98
|
+
This function creates `n` clones of the input vector and applies random mutations to each of
|
99
|
+
them, simulating the process of clonal expansion in artificial immune systems. Each clone
|
100
|
+
will have a random number of mutations applied in distinct positions of the original vector.
|
101
|
+
|
102
|
+
Parameters
|
103
|
+
----------
|
104
|
+
vector : npt.NDArray[np.bool_]
|
105
|
+
The original immune cell with binary values to be cloned and mutated.
|
106
|
+
n : int
|
107
|
+
The number of mutated clones to be generated.
|
108
|
+
bounds : np.ndarray
|
109
|
+
Array (n_features, 2) with min and max per dimension.
|
110
|
+
|
111
|
+
Returns
|
112
|
+
-------
|
113
|
+
clone_set : npt.NDArray
|
114
|
+
An Array(n, len(vector)) containing the `n` mutated clones of the original vector.
|
115
|
+
"""
|
116
|
+
n_features = vector.shape[0]
|
117
|
+
clone_set = np.empty((n, n_features), dtype=np.float64)
|
118
|
+
|
119
|
+
for i in range(n):
|
120
|
+
n_mutations = np.random.randint(1, n_features)
|
121
|
+
clone = vector.copy()
|
122
|
+
position_mutations = np.random.permutation(n_features)[:n_mutations]
|
123
|
+
for j in range(n_mutations):
|
124
|
+
idx = position_mutations[j]
|
125
|
+
min_limit = bounds[idx, 0]
|
126
|
+
max_limit = bounds[idx, 1]
|
127
|
+
clone[idx] = np.random.uniform(min_limit, max_limit)
|
128
|
+
clone_set[i] = clone
|
129
|
+
|
130
|
+
return clone_set
|
aisp/csa/__init__.py
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
"""Module (CSA) Clonal Selection Algorithm.
|
2
|
+
|
3
|
+
CSAs are inspired by the process of antibody proliferation upon detecting an antigen, during which
|
4
|
+
the generated antibodies undergo mutations in an attempt to enhance pathogen recognition.
|
5
|
+
"""
|
6
|
+
from ._ai_immune_recognition_sys import AIRS
|
7
|
+
|
8
|
+
__author__ = 'João Paulo da Silva Barros'
|
9
|
+
__all__ = ['AIRS']
|
@@ -0,0 +1,498 @@
|
|
1
|
+
"""Artificial Immune Recognition System (AIRS)."""
|
2
|
+
|
3
|
+
import random
|
4
|
+
from collections import Counter
|
5
|
+
from heapq import nlargest
|
6
|
+
from operator import attrgetter
|
7
|
+
from typing import List, Optional, Dict
|
8
|
+
|
9
|
+
import numpy as np
|
10
|
+
import numpy.typing as npt
|
11
|
+
from scipy.spatial.distance import pdist
|
12
|
+
from tqdm import tqdm
|
13
|
+
|
14
|
+
|
15
|
+
from ._cell import Cell
|
16
|
+
from ..utils.sanitizers import sanitize_param, sanitize_seed, sanitize_choice
|
17
|
+
from ..utils.distance import hamming, compute_metric_distance, get_metric_code
|
18
|
+
from ..utils.types import FeatureType, MetricType
|
19
|
+
from ..utils.validation import detect_vector_data_type
|
20
|
+
from ._base import BaseAIRS
|
21
|
+
|
22
|
+
|
23
|
+
class _ARB(Cell):
|
24
|
+
"""ARB (Artificial recognition ball).
|
25
|
+
|
26
|
+
Individual from the set of recognizing cells (ARB), inherits characteristics from a B-cell,
|
27
|
+
adding resource consumption
|
28
|
+
|
29
|
+
Parameters
|
30
|
+
----------
|
31
|
+
vector : npt.NDArray
|
32
|
+
A vector of cell features.
|
33
|
+
stimulation : Optional[float], default=None
|
34
|
+
The rate at which the cell stimulates antigens.
|
35
|
+
"""
|
36
|
+
|
37
|
+
def __init__(
|
38
|
+
self,
|
39
|
+
vector: npt.NDArray,
|
40
|
+
stimulation: Optional[float] = None
|
41
|
+
) -> None:
|
42
|
+
super().__init__(vector)
|
43
|
+
self.resource: float = 0.0
|
44
|
+
if stimulation is not None:
|
45
|
+
self.stimulation: float = stimulation
|
46
|
+
|
47
|
+
def consume_resource(self, n_resource: float, amplified: float = 1) -> float:
|
48
|
+
"""
|
49
|
+
Update the amount of resources available for an ARB after consumption.
|
50
|
+
|
51
|
+
This function consumes the resources and returns the remaining amount of resources after
|
52
|
+
consumption.
|
53
|
+
|
54
|
+
Parameters
|
55
|
+
----------
|
56
|
+
n_resource : float
|
57
|
+
Amount of resources.
|
58
|
+
amplified : float
|
59
|
+
Amplifier for the resource consumption by the cell. It is multiplied by the cell's
|
60
|
+
stimulus. The default value is 1.
|
61
|
+
|
62
|
+
Returns
|
63
|
+
-------
|
64
|
+
n_resource : float
|
65
|
+
The remaining amount of resources after consumption.
|
66
|
+
"""
|
67
|
+
consumption = self.stimulation * amplified
|
68
|
+
n_resource -= consumption
|
69
|
+
if n_resource < 0:
|
70
|
+
return 0
|
71
|
+
|
72
|
+
self.resource = consumption
|
73
|
+
return n_resource
|
74
|
+
|
75
|
+
def to_cell(self) -> Cell:
|
76
|
+
"""Convert this _ARB into a pure Cell object."""
|
77
|
+
return Cell(self.vector)
|
78
|
+
|
79
|
+
|
80
|
+
class AIRS(BaseAIRS):
|
81
|
+
"""Artificial Immune Recognition System (AIRS).
|
82
|
+
|
83
|
+
The Artificial Immune Recognition System (AIRS) is a classification algorithm inspired by the
|
84
|
+
clonal selection process of the biological immune system. This implementation is based on the
|
85
|
+
simplified AIRS2 version described in [1]_. The algorithm has been adapted to support both
|
86
|
+
real-valued (continuous) and binary feature datasets.
|
87
|
+
|
88
|
+
Parameters
|
89
|
+
----------
|
90
|
+
n_resources : float, default=10
|
91
|
+
Total amount of available resources.
|
92
|
+
rate_clonal : float, default=10
|
93
|
+
Maximum number of possible clones of a class. This quantity is multiplied by (
|
94
|
+
cell_stimulus * rate_hypermutation) to define the number of clones.
|
95
|
+
rate_mc_init : float, default=0.2
|
96
|
+
Percentage of samples used to initialize memory cells.
|
97
|
+
rate_hypermutation : float, default=0.75
|
98
|
+
The rate of mutated clones derived from rate_clonal as a scalar factor.
|
99
|
+
affinity_threshold_scalar : float, default=0.75
|
100
|
+
Normalized affinity threshold.
|
101
|
+
k : int, default=3
|
102
|
+
The number of K nearest neighbors that will be used to choose a label in the prediction.
|
103
|
+
max_iters : int, default=100
|
104
|
+
Maximum number of interactions in the refinement process of the ARB set exposed to aᵢ.
|
105
|
+
resource_amplified : float, default=1.0
|
106
|
+
Resource consumption amplifier is multiplied with the incentive to subtract resources.
|
107
|
+
Defaults to 1.0 without amplification.
|
108
|
+
metric : Literal["manhattan", "minkowski", "euclidean"], default="euclidean"
|
109
|
+
Way to calculate the distance between the detector and the sample:
|
110
|
+
|
111
|
+
* ``'Euclidean'`` ➜ The calculation of the distance is given by the expression:
|
112
|
+
√( (x₁ – x₂)² + (y₁ – y₂)² + ... + (yn – yn)²).
|
113
|
+
|
114
|
+
* ``'minkowski'`` ➜ The calculation of the distance is given by the expression:
|
115
|
+
( |X₁ – Y₁|p + |X₂ – Y₂|p + ... + |Xn – Yn|p) ¹/ₚ.
|
116
|
+
|
117
|
+
* ``'manhattan'`` ➜ The calculation of the distance is given by the expression:
|
118
|
+
( |x₁ – x₂| + |y₁ – y₂| + ... + |yn – yn|).
|
119
|
+
|
120
|
+
seed : int
|
121
|
+
Seed for the random generation of detector values. Defaults to None.
|
122
|
+
|
123
|
+
**kwargs
|
124
|
+
p : float
|
125
|
+
This parameter stores the value of ``p`` used in the Minkowski distance. The default
|
126
|
+
is ``2``, which represents normalized Euclidean distance.\
|
127
|
+
Different values of p lead to different variants of the Minkowski Distance.
|
128
|
+
|
129
|
+
Notes
|
130
|
+
-----
|
131
|
+
This implementation is inspired by AIRS2, a simplified version of the original AIRS algorithm.
|
132
|
+
Introducing adaptations to handle continuous and binary datasets.
|
133
|
+
|
134
|
+
Based on Algorithm 16.5 from Brabazon et al. [1]_.
|
135
|
+
|
136
|
+
Related and noteworthy works: access here [2]_.
|
137
|
+
|
138
|
+
References
|
139
|
+
----------
|
140
|
+
.. [1] Brabazon, A., O’Neill, M., & McGarraghy, S. (2015). Natural Computing Algorithms. In
|
141
|
+
Natural Computing Series. Springer Berlin Heidelberg.
|
142
|
+
https://doi.org/10.1007/978-3-662-43631-8
|
143
|
+
|
144
|
+
.. [2] AZZOUG, Aghiles. Artificial Immune Recognition System V2.
|
145
|
+
Available at: https://github.com/AghilesAzzoug/Artificial-Immune-System
|
146
|
+
"""
|
147
|
+
|
148
|
+
def __init__(
|
149
|
+
self,
|
150
|
+
n_resources: float = 10,
|
151
|
+
rate_clonal: int = 10,
|
152
|
+
rate_mc_init: float = 0.2,
|
153
|
+
rate_hypermutation: float = 0.75,
|
154
|
+
affinity_threshold_scalar: float = 0.75,
|
155
|
+
k: int = 3,
|
156
|
+
max_iters: int = 100,
|
157
|
+
resource_amplified: float = 1.0,
|
158
|
+
metric: MetricType = "euclidean",
|
159
|
+
seed: Optional[int] = None,
|
160
|
+
**kwargs,
|
161
|
+
) -> None:
|
162
|
+
self.n_resources: float = sanitize_param(n_resources, 10, lambda x: x >= 1)
|
163
|
+
self.rate_mc_init: float = sanitize_param(
|
164
|
+
rate_mc_init, 0.2, lambda x: 0 < x <= 1
|
165
|
+
)
|
166
|
+
self.rate_clonal: int = sanitize_param(rate_clonal, 10, lambda x: x > 0)
|
167
|
+
self.rate_hypermutation: float = sanitize_param(
|
168
|
+
rate_hypermutation, 0.75, lambda x: x > 0
|
169
|
+
)
|
170
|
+
self.affinity_threshold_scalar: float = sanitize_param(
|
171
|
+
affinity_threshold_scalar, 0.75, lambda x: x > 0
|
172
|
+
)
|
173
|
+
self.resource_amplified: float = sanitize_param(
|
174
|
+
resource_amplified, 1, lambda x: x > 1
|
175
|
+
)
|
176
|
+
self.k: int = sanitize_param(k, 3, lambda x: x > 3)
|
177
|
+
self.max_iters: int = sanitize_param(max_iters, 100, lambda x: x > 0)
|
178
|
+
self.seed: Optional[int] = sanitize_seed(seed)
|
179
|
+
if self.seed is not None:
|
180
|
+
np.random.seed(self.seed)
|
181
|
+
|
182
|
+
self._feature_type: FeatureType = "continuous-features"
|
183
|
+
|
184
|
+
self.metric = sanitize_choice(
|
185
|
+
metric, ["manhattan", "minkowski"], "euclidean"
|
186
|
+
)
|
187
|
+
|
188
|
+
self.p: np.float64 = np.float64(kwargs.get("p", 2.0))
|
189
|
+
|
190
|
+
self._cells_memory = None
|
191
|
+
self.affinity_threshold = 0.0
|
192
|
+
self.classes = []
|
193
|
+
self._bounds: Optional[npt.NDArray[np.float64]] = None
|
194
|
+
|
195
|
+
@property
|
196
|
+
def cells_memory(self) -> Optional[Dict[str, list[Cell]]]:
|
197
|
+
"""Returns the trained cells memory, organized by class."""
|
198
|
+
return self._cells_memory
|
199
|
+
|
200
|
+
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True) -> "AIRS":
|
201
|
+
"""
|
202
|
+
Fit the model to the training data using the AIRS.
|
203
|
+
|
204
|
+
The function ``fit(...)``, performs the training according to ``X`` and ``y``, using the
|
205
|
+
method AIRS.
|
206
|
+
|
207
|
+
Parameters
|
208
|
+
----------
|
209
|
+
X : npt.NDArray
|
210
|
+
Training array, containing the samples and their characteristics,
|
211
|
+
[``N samples`` (rows)][``N features`` (columns)].
|
212
|
+
y : npt.NDArray
|
213
|
+
Array of target classes of ``X`` with [``N samples`` (lines)].
|
214
|
+
verbose : bool
|
215
|
+
Feedback on which sample aᵢ the memory cells are being generated.
|
216
|
+
|
217
|
+
Returns
|
218
|
+
-------
|
219
|
+
AIRS
|
220
|
+
Returns the instance itself.
|
221
|
+
"""
|
222
|
+
progress = None
|
223
|
+
|
224
|
+
self._feature_type = detect_vector_data_type(X)
|
225
|
+
|
226
|
+
super()._check_and_raise_exceptions_fit(X, y)
|
227
|
+
|
228
|
+
match self._feature_type:
|
229
|
+
case "binary-features":
|
230
|
+
X = X.astype(np.bool_)
|
231
|
+
self.metric = "hamming"
|
232
|
+
case "ranged-features":
|
233
|
+
self._bounds = np.vstack([np.min(X, axis=0), np.max(X, axis=0)])
|
234
|
+
|
235
|
+
self.classes = np.unique(y)
|
236
|
+
sample_index = self._slice_index_list_by_class(y)
|
237
|
+
if verbose:
|
238
|
+
progress = tqdm(
|
239
|
+
total=len(y),
|
240
|
+
postfix="\n",
|
241
|
+
bar_format="{desc} ┇{bar}┇ {n}/{total} memory cells for each aᵢ",
|
242
|
+
)
|
243
|
+
pool_cells_classes = {}
|
244
|
+
for _class_ in self.classes:
|
245
|
+
if verbose and progress is not None:
|
246
|
+
progress.set_description_str(
|
247
|
+
f"Generating the memory cells for the {_class_} class:"
|
248
|
+
)
|
249
|
+
|
250
|
+
x_class = X[sample_index[_class_]]
|
251
|
+
# Calculating the similarity threshold between antigens
|
252
|
+
self._cells_affinity_threshold(x_class)
|
253
|
+
sufficiently_similar = (
|
254
|
+
self.affinity_threshold * self.affinity_threshold_scalar
|
255
|
+
)
|
256
|
+
# Initialize memory cells for a class.
|
257
|
+
pool_c: list[Cell] = self._init_memory_c(x_class)
|
258
|
+
|
259
|
+
for ai in x_class:
|
260
|
+
# Calculating the stimulation of memory cells with aᵢ and selecting the largest
|
261
|
+
# stimulation from the memory set.
|
262
|
+
c_match = pool_c[0]
|
263
|
+
match_stimulation = -1
|
264
|
+
for cell in pool_c:
|
265
|
+
stimulation = self._affinity(cell.vector, ai)
|
266
|
+
if stimulation > match_stimulation:
|
267
|
+
match_stimulation = stimulation
|
268
|
+
c_match = cell
|
269
|
+
|
270
|
+
arb_list: list[_ARB] = [
|
271
|
+
_ARB(
|
272
|
+
vector=c_match.vector,
|
273
|
+
stimulation=match_stimulation
|
274
|
+
)
|
275
|
+
]
|
276
|
+
|
277
|
+
set_clones: npt.NDArray = c_match.hyper_clonal_mutate(
|
278
|
+
int(self.rate_hypermutation * self.rate_clonal * match_stimulation),
|
279
|
+
self._feature_type
|
280
|
+
)
|
281
|
+
|
282
|
+
for clone in set_clones:
|
283
|
+
arb_list.append(
|
284
|
+
_ARB(
|
285
|
+
vector=clone,
|
286
|
+
stimulation=self._affinity(clone, ai),
|
287
|
+
)
|
288
|
+
)
|
289
|
+
|
290
|
+
c_candidate = self._refinement_arb(ai, match_stimulation, arb_list)
|
291
|
+
|
292
|
+
if c_candidate.stimulation > match_stimulation:
|
293
|
+
pool_c.append(c_candidate.to_cell())
|
294
|
+
if self._affinity(c_candidate.vector, c_match.vector) < sufficiently_similar:
|
295
|
+
pool_c.remove(c_match)
|
296
|
+
|
297
|
+
if verbose and progress is not None:
|
298
|
+
progress.update(1)
|
299
|
+
pool_cells_classes[_class_] = pool_c
|
300
|
+
|
301
|
+
if verbose and progress is not None:
|
302
|
+
progress.set_description(
|
303
|
+
f"\033[92m✔ Set of memory cells for classes ({', '.join(map(str, self.classes))}) "
|
304
|
+
f"successfully generated\033[0m"
|
305
|
+
)
|
306
|
+
self._cells_memory = pool_cells_classes
|
307
|
+
return self
|
308
|
+
|
309
|
+
def predict(self, X: npt.NDArray) -> Optional[npt.NDArray]:
|
310
|
+
"""
|
311
|
+
Predict class labels based on the memory cells created during training.
|
312
|
+
|
313
|
+
This method uses the trained memory cells to perform classification of the input data
|
314
|
+
using the k-nearest neighbors approach.
|
315
|
+
|
316
|
+
Parameters
|
317
|
+
----------
|
318
|
+
X : npt.NDArray
|
319
|
+
Array with input samples with [``N samples`` (Lines)] and [``N characteristics``(
|
320
|
+
Columns)]
|
321
|
+
|
322
|
+
Returns
|
323
|
+
-------
|
324
|
+
C : npt.NDArray or None
|
325
|
+
An ndarray of the form ``C`` [``N samples``], containing the predicted classes for
|
326
|
+
``X``. or ``None``: If there are no detectors for the prediction.
|
327
|
+
"""
|
328
|
+
if self._cells_memory is None:
|
329
|
+
return None
|
330
|
+
|
331
|
+
super()._check_and_raise_exceptions_predict(
|
332
|
+
X, len(self._cells_memory[self.classes[0]][0].vector), self._feature_type
|
333
|
+
)
|
334
|
+
|
335
|
+
c: list = []
|
336
|
+
|
337
|
+
all_cells_memory = [
|
338
|
+
(class_name, cell.vector)
|
339
|
+
for class_name in self.classes
|
340
|
+
for cell in self._cells_memory[class_name]
|
341
|
+
]
|
342
|
+
|
343
|
+
for line in X:
|
344
|
+
label_stim_list = [
|
345
|
+
(class_name, self._affinity(memory, line))
|
346
|
+
for class_name, memory in all_cells_memory
|
347
|
+
]
|
348
|
+
# Create the list with the k nearest neighbors and select the class with the most votes
|
349
|
+
k_nearest = nlargest(self.k, label_stim_list, key=lambda x: x[1])
|
350
|
+
votes = Counter(label for label, _ in k_nearest)
|
351
|
+
c.append(votes.most_common(1)[0][0])
|
352
|
+
return np.array(c)
|
353
|
+
|
354
|
+
def _refinement_arb(
|
355
|
+
self, ai: npt.NDArray, c_match_stimulation: float, arb_list: List[_ARB]
|
356
|
+
) -> _ARB:
|
357
|
+
"""
|
358
|
+
Refine the ARB set until the average stimulation exceeds the defined threshold.
|
359
|
+
|
360
|
+
This method iteratively refines the ARB set by comparing the average stimulation
|
361
|
+
against the `affinity_threshold_scalar`. Refinement continues through multiple iterations
|
362
|
+
until the threshold is met or exceeded.
|
363
|
+
|
364
|
+
Parameters
|
365
|
+
----------
|
366
|
+
ai : npt.NDArray
|
367
|
+
The current antigen.
|
368
|
+
c_match_stimulation : float
|
369
|
+
The highest stimulation relative to aᵢ
|
370
|
+
arb_list : List[_ARB]
|
371
|
+
ARB set.
|
372
|
+
|
373
|
+
Returns
|
374
|
+
-------
|
375
|
+
_ARB
|
376
|
+
The cell with the highest ARB stimulation
|
377
|
+
|
378
|
+
Notes
|
379
|
+
-----
|
380
|
+
Based on Algorithm 16.6 from Brabazon et al. [1]_.
|
381
|
+
|
382
|
+
References
|
383
|
+
----------
|
384
|
+
.. [1] Brabazon, A., O’Neill, M., & McGarraghy, S. (2015).
|
385
|
+
Natural Computing Algorithms. Natural Computing Series.
|
386
|
+
Springer Berlin Heidelberg. https://doi.org/10.1007/978-3-662-43631-8
|
387
|
+
"""
|
388
|
+
iters = 0
|
389
|
+
while True:
|
390
|
+
iters += 1
|
391
|
+
arb_list.sort(key=attrgetter("stimulation"), reverse=True)
|
392
|
+
resource = self.n_resources
|
393
|
+
for arb in arb_list:
|
394
|
+
resource = arb.consume_resource(
|
395
|
+
n_resource=resource, amplified=self.resource_amplified
|
396
|
+
)
|
397
|
+
if resource == 0:
|
398
|
+
break
|
399
|
+
# remove cells without resources and calculate the average ARB stimulus.
|
400
|
+
arb_list = [cell for cell in arb_list if cell.resource > 0]
|
401
|
+
if not arb_list:
|
402
|
+
break
|
403
|
+
avg_stimulation = sum(item.stimulation for item in arb_list) / len(arb_list)
|
404
|
+
|
405
|
+
if iters == self.max_iters or avg_stimulation > self.affinity_threshold:
|
406
|
+
break
|
407
|
+
|
408
|
+
# pick a random cell for mutations.
|
409
|
+
random_index = random.randint(0, len(arb_list) - 1)
|
410
|
+
clone_arb = arb_list[random_index].hyper_clonal_mutate(
|
411
|
+
int(self.rate_clonal * c_match_stimulation),
|
412
|
+
self._feature_type
|
413
|
+
)
|
414
|
+
|
415
|
+
arb_list = [
|
416
|
+
_ARB(
|
417
|
+
vector=clone,
|
418
|
+
stimulation=self._affinity(clone, ai)
|
419
|
+
)
|
420
|
+
for clone in clone_arb
|
421
|
+
]
|
422
|
+
|
423
|
+
return max(arb_list, key=attrgetter("stimulation"))
|
424
|
+
|
425
|
+
def _cells_affinity_threshold(self, antigens_list: npt.NDArray):
|
426
|
+
"""
|
427
|
+
Calculate the affinity threshold based on the average affinity between training instances.
|
428
|
+
|
429
|
+
This function calculates the affinity threshold based on the average affinity between
|
430
|
+
training instances, where aᵢ and aⱼ are a pair of antigens, and affinity
|
431
|
+
is measured by distance (Euclidean, Manhattan, Minkowski, Hamming).
|
432
|
+
Following the formula:
|
433
|
+
|
434
|
+
> affinity_threshold = (Σᵢ=₁ⁿ⁻¹ Σⱼ=ᵢ₊₁ⁿ affinity(aᵢ, aⱼ)) / (n(n-1)/2
|
435
|
+
|
436
|
+
Parameters
|
437
|
+
----------
|
438
|
+
antigens_list : npt.NDArray
|
439
|
+
List of training antigens.
|
440
|
+
"""
|
441
|
+
if self._feature_type == "binary-features":
|
442
|
+
distances = pdist(antigens_list, metric="hamming")
|
443
|
+
else:
|
444
|
+
metric_kwargs = {'p': self.p} if self.metric == 'minkowski' else {}
|
445
|
+
distances = pdist(antigens_list, metric=self.metric, **metric_kwargs)
|
446
|
+
|
447
|
+
n = antigens_list.shape[0]
|
448
|
+
sum_affinity = np.sum(1.0 - (distances / (1.0 + distances)))
|
449
|
+
self.affinity_threshold = 1.0 - (sum_affinity / ((n * (n - 1)) / 2))
|
450
|
+
|
451
|
+
def _affinity(self, u: npt.NDArray, v: npt.NDArray) -> float:
|
452
|
+
"""
|
453
|
+
Calculate the stimulus between two vectors using metrics.
|
454
|
+
|
455
|
+
Parameters
|
456
|
+
----------
|
457
|
+
u : npt.NDArray
|
458
|
+
Coordinates of the first point.
|
459
|
+
v : npt.NDArray
|
460
|
+
Coordinates of the second point.
|
461
|
+
|
462
|
+
Returns
|
463
|
+
-------
|
464
|
+
float
|
465
|
+
The stimulus rate between the vectors.
|
466
|
+
"""
|
467
|
+
distance: float
|
468
|
+
if self._feature_type == "binary-features":
|
469
|
+
distance = hamming(u, v)
|
470
|
+
else:
|
471
|
+
distance = compute_metric_distance(
|
472
|
+
u, v, get_metric_code(self.metric), self.p
|
473
|
+
)
|
474
|
+
return 1 - (distance / (1 + distance))
|
475
|
+
|
476
|
+
def _init_memory_c(self, antigens_list: npt.NDArray) -> List[Cell]:
|
477
|
+
"""
|
478
|
+
Initialize memory cells by randomly selecting `rate_mc_init` antigens.
|
479
|
+
|
480
|
+
Parameters
|
481
|
+
----------
|
482
|
+
antigens_list : npt.NDArray
|
483
|
+
List of training antigens.
|
484
|
+
|
485
|
+
Returns
|
486
|
+
-------
|
487
|
+
List[Cell]
|
488
|
+
List of initialized memories.
|
489
|
+
"""
|
490
|
+
n = antigens_list.shape[0]
|
491
|
+
n_cells = int(n * self.rate_mc_init)
|
492
|
+
|
493
|
+
if n == 0 or n_cells == 0:
|
494
|
+
return []
|
495
|
+
|
496
|
+
permutation = np.random.permutation(n)
|
497
|
+
selected = antigens_list[permutation[:n_cells]]
|
498
|
+
return [Cell(ai) for ai in selected]
|
aisp/csa/_base.py
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
"""Base Class for Clonal Selection Algorithm."""
|
2
|
+
|
3
|
+
from abc import ABC
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
import numpy.typing as npt
|
7
|
+
|
8
|
+
from ..exceptions import FeatureDimensionMismatch
|
9
|
+
from ..utils.types import FeatureType
|
10
|
+
from ..base import BaseClassifier
|
11
|
+
|
12
|
+
|
13
|
+
class BaseAIRS(BaseClassifier, ABC):
|
14
|
+
"""
|
15
|
+
Base class for algorithm AIRS.
|
16
|
+
|
17
|
+
The base class contains functions that are used by more than one class in the package, and
|
18
|
+
therefore are considered essential for the overall functioning of the system.
|
19
|
+
"""
|
20
|
+
|
21
|
+
@staticmethod
|
22
|
+
def _check_and_raise_exceptions_fit(
|
23
|
+
X: npt.NDArray,
|
24
|
+
y: npt.NDArray
|
25
|
+
):
|
26
|
+
"""
|
27
|
+
Verify the fit parameters and throw exceptions if the verification is not successful.
|
28
|
+
|
29
|
+
Parameters
|
30
|
+
----------
|
31
|
+
X : npt.NDArray
|
32
|
+
Training array, containing the samples and their characteristics,
|
33
|
+
[``N samples`` (rows)][``N features`` (columns)].
|
34
|
+
y : npt.NDArray
|
35
|
+
Array of target classes of ``X`` with [``N samples`` (lines)].
|
36
|
+
|
37
|
+
Raises
|
38
|
+
------
|
39
|
+
TypeError:
|
40
|
+
If X or y are not ndarrays or have incompatible shapes.
|
41
|
+
"""
|
42
|
+
if not isinstance(X, np.ndarray):
|
43
|
+
if isinstance(X, list):
|
44
|
+
X = np.array(X)
|
45
|
+
else:
|
46
|
+
raise TypeError("X is not an ndarray or list.")
|
47
|
+
elif not isinstance(y, np.ndarray):
|
48
|
+
if isinstance(y, list):
|
49
|
+
y = np.array(y)
|
50
|
+
else:
|
51
|
+
raise TypeError("y is not an ndarray or list.")
|
52
|
+
if X.shape[0] != y.shape[0]:
|
53
|
+
raise TypeError(
|
54
|
+
"X does not have the same amount of sample for the output classes in y."
|
55
|
+
)
|
56
|
+
|
57
|
+
|
58
|
+
@staticmethod
|
59
|
+
def _check_and_raise_exceptions_predict(
|
60
|
+
X: npt.NDArray,
|
61
|
+
expected: int = 0,
|
62
|
+
feature_type: FeatureType = "continuous-features"
|
63
|
+
) -> None:
|
64
|
+
"""
|
65
|
+
Verify the predict parameters and throw exceptions if the verification is not successful.
|
66
|
+
|
67
|
+
Parameters
|
68
|
+
----------
|
69
|
+
X : npt.NDArray
|
70
|
+
Input array for prediction, containing the samples and their characteristics,
|
71
|
+
[``N samples`` (rows)][``N features`` (columns)].
|
72
|
+
expected : int, default=0
|
73
|
+
Expected number of features per sample (columns in X).
|
74
|
+
feature_type : FeatureType, default="continuous-features"
|
75
|
+
Specifies the type of feature_type to use, depending on whether the input data has
|
76
|
+
continuous or binary features.
|
77
|
+
|
78
|
+
Raises
|
79
|
+
------
|
80
|
+
TypeError
|
81
|
+
If X is not a ndarray or list.
|
82
|
+
FeatureDimensionMismatch
|
83
|
+
If the number of features in X does not match the expected number.
|
84
|
+
ValueError
|
85
|
+
If feature_type is binary-features and X contains values that are not composed only
|
86
|
+
of 0 and 1.
|
87
|
+
"""
|
88
|
+
if not isinstance(X, (np.ndarray, list)):
|
89
|
+
raise TypeError("X is not an ndarray or list")
|
90
|
+
if expected != len(X[0]):
|
91
|
+
raise FeatureDimensionMismatch(
|
92
|
+
expected,
|
93
|
+
len(X[0]),
|
94
|
+
"X"
|
95
|
+
)
|
96
|
+
|
97
|
+
if feature_type != "binary-features":
|
98
|
+
return
|
99
|
+
|
100
|
+
# Checks if matrix X contains only binary samples. Otherwise, raises an exception.
|
101
|
+
if not np.isin(X, [0, 1]).all():
|
102
|
+
raise ValueError(
|
103
|
+
"The array X contains values that are not composed only of 0 and 1."
|
104
|
+
)
|
aisp/csa/_cell.py
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
"""Represents a memory B-cell."""
|
2
|
+
|
3
|
+
from dataclasses import dataclass
|
4
|
+
from typing import Optional
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
import numpy.typing as npt
|
8
|
+
|
9
|
+
from ..base.mutation import (
|
10
|
+
clone_and_mutate_continuous,
|
11
|
+
clone_and_mutate_binary,
|
12
|
+
clone_and_mutate_ranged
|
13
|
+
)
|
14
|
+
from ..utils.types import FeatureType
|
15
|
+
|
16
|
+
|
17
|
+
@dataclass(slots=True)
|
18
|
+
class Cell:
|
19
|
+
"""
|
20
|
+
Represents a memory B-cell.
|
21
|
+
|
22
|
+
Attributes
|
23
|
+
----------
|
24
|
+
vector : npt.NDArray
|
25
|
+
A vector of cell features.
|
26
|
+
"""
|
27
|
+
|
28
|
+
vector: np.ndarray
|
29
|
+
|
30
|
+
def hyper_clonal_mutate(
|
31
|
+
self,
|
32
|
+
n: int,
|
33
|
+
feature_type: FeatureType = "continuous-features",
|
34
|
+
bounds: Optional[npt.NDArray[np.float64]] = None
|
35
|
+
) -> npt.NDArray:
|
36
|
+
"""
|
37
|
+
Clones N features from a cell's features, generating a set of mutated vectors.
|
38
|
+
|
39
|
+
Parameters
|
40
|
+
----------
|
41
|
+
n : int
|
42
|
+
Number of clones to be generated from mutations of the original cell.
|
43
|
+
feature_type : Literal["binary-features", "continuous-features", "ranged-features"]
|
44
|
+
Specifies the type of feature_type to use based on the nature of the input features
|
45
|
+
bounds : np.ndarray
|
46
|
+
Array (n_features, 2) with min and max per dimension.
|
47
|
+
|
48
|
+
Returns
|
49
|
+
-------
|
50
|
+
npt.NDArray
|
51
|
+
An array containing N mutated vectors from the original cell.
|
52
|
+
"""
|
53
|
+
if feature_type == "binary-features":
|
54
|
+
return clone_and_mutate_binary(self.vector, n)
|
55
|
+
if feature_type == "ranged-features" and bounds is not None:
|
56
|
+
clone_and_mutate_ranged(self.vector, n, bounds)
|
57
|
+
return clone_and_mutate_continuous(self.vector, n)
|
58
|
+
|
59
|
+
def __eq__(self, other):
|
60
|
+
"""Check if two cells are equal."""
|
61
|
+
return np.array_equal(self.vector, other.vector)
|
aisp/exceptions.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
"""Custom warnings and errors."""
|
2
2
|
|
3
|
+
from typing import Optional
|
4
|
+
|
3
5
|
|
4
6
|
class MaxDiscardsReachedError(Exception):
|
5
7
|
"""Exception thrown when the maximum number of detector discards is reached."""
|
@@ -27,7 +29,7 @@ class FeatureDimensionMismatch(Exception):
|
|
27
29
|
self,
|
28
30
|
expected: int,
|
29
31
|
received: int,
|
30
|
-
variable_name: str = None
|
32
|
+
variable_name: Optional[str] = None
|
31
33
|
):
|
32
34
|
parts = []
|
33
35
|
if variable_name:
|
@@ -41,3 +43,17 @@ class FeatureDimensionMismatch(Exception):
|
|
41
43
|
"and matches the expected shape for the model."
|
42
44
|
)
|
43
45
|
super().__init__(message)
|
46
|
+
|
47
|
+
|
48
|
+
class UnsupportedTypeError(Exception):
|
49
|
+
"""
|
50
|
+
Exception raised when the input vector type is not supported.
|
51
|
+
|
52
|
+
This exception is thrown when the vector data type does not match any of the supported.
|
53
|
+
"""
|
54
|
+
|
55
|
+
def __init__(self, message=None):
|
56
|
+
if message is None:
|
57
|
+
message = ("Type is not supported. Provide a binary, normalized, or bounded "
|
58
|
+
"continuous vector.")
|
59
|
+
super().__init__(message)
|
aisp/nsa/_base.py
CHANGED
@@ -20,8 +20,8 @@ class BaseNSA(BaseClassifier, ABC):
|
|
20
20
|
|
21
21
|
@staticmethod
|
22
22
|
def _check_and_raise_exceptions_fit(
|
23
|
-
X: npt.NDArray
|
24
|
-
y: npt.NDArray
|
23
|
+
X: npt.NDArray,
|
24
|
+
y: npt.NDArray,
|
25
25
|
_class_: Literal["RNSA", "BNSA"] = "RNSA",
|
26
26
|
) -> None:
|
27
27
|
"""Verify fit function parameters.
|
@@ -67,7 +67,7 @@ class BaseNSA(BaseClassifier, ABC):
|
|
67
67
|
|
68
68
|
@staticmethod
|
69
69
|
def _check_and_raise_exceptions_predict(
|
70
|
-
X: npt.NDArray
|
70
|
+
X: npt.NDArray,
|
71
71
|
expected: int = 0,
|
72
72
|
_class_: Literal["RNSA", "BNSA"] = "RNSA",
|
73
73
|
) -> None:
|
aisp/nsa/_negative_selection.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
"""Negative Selection Algorithm."""
|
2
2
|
|
3
|
-
from typing import Dict, Literal, Optional, Union
|
3
|
+
from typing import Any, Dict, Literal, Optional, Union
|
4
4
|
from tqdm import tqdm
|
5
5
|
|
6
6
|
import numpy as np
|
@@ -90,12 +90,12 @@ class RNSA(BaseNSA):
|
|
90
90
|
k: int = 1,
|
91
91
|
metric: Literal["manhattan", "minkowski", "euclidean"] = "euclidean",
|
92
92
|
max_discards: int = 1000,
|
93
|
-
seed: int = None,
|
93
|
+
seed: Optional[int] = None,
|
94
94
|
algorithm: Literal["default-NSA", "V-detector"] = "default-NSA",
|
95
|
-
**kwargs:
|
95
|
+
**kwargs: Any,
|
96
96
|
):
|
97
|
-
self.metric = sanitize_choice(metric, ["manhattan", "minkowski"], "euclidean")
|
98
|
-
self.seed = sanitize_seed(seed)
|
97
|
+
self.metric: str = sanitize_choice(metric, ["manhattan", "minkowski"], "euclidean")
|
98
|
+
self.seed: Optional[int] = sanitize_seed(seed)
|
99
99
|
if self.seed is not None:
|
100
100
|
np.random.seed(seed)
|
101
101
|
self.k: int = sanitize_param(k, 1, lambda x: x > 1)
|
@@ -108,20 +108,20 @@ class RNSA(BaseNSA):
|
|
108
108
|
self.max_discards: int = sanitize_param(max_discards, 1000, lambda x: x > 0)
|
109
109
|
|
110
110
|
# Retrieves the variables from kwargs.
|
111
|
-
self.p:
|
112
|
-
self.cell_bounds: bool = kwargs.get("cell_bounds", False)
|
113
|
-
self.non_self_label: str = kwargs.get("non_self_label", "non-self")
|
111
|
+
self.p: np.float64 = np.float64(kwargs.get("p", 2))
|
112
|
+
self.cell_bounds: bool = bool(kwargs.get("cell_bounds", False))
|
113
|
+
self.non_self_label: str = str(kwargs.get("non_self_label", "non-self"))
|
114
114
|
|
115
115
|
# Initializes the other class variables as None.
|
116
116
|
self._detectors: Union[dict, None] = None
|
117
|
-
self.classes: npt.NDArray =
|
117
|
+
self.classes: Union[npt.NDArray, list] = []
|
118
118
|
|
119
119
|
@property
|
120
|
-
def detectors(self) -> Dict[str, list[Detector]]:
|
120
|
+
def detectors(self) -> Optional[Dict[str, list[Detector]]]:
|
121
121
|
"""Returns the trained detectors, organized by class."""
|
122
122
|
return self._detectors
|
123
123
|
|
124
|
-
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True):
|
124
|
+
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True) -> "RNSA":
|
125
125
|
"""
|
126
126
|
Perform training according to X and y, using the negative selection method (NegativeSelect).
|
127
127
|
|
@@ -170,7 +170,7 @@ class RNSA(BaseNSA):
|
|
170
170
|
discard_count = 0
|
171
171
|
x_class = X[sample_index[_class_]]
|
172
172
|
# Indicating which class the algorithm is currently processing for the progress bar.
|
173
|
-
if verbose:
|
173
|
+
if verbose and progress is not None:
|
174
174
|
progress.set_description_str(
|
175
175
|
f"Generating the detectors for the {_class_} class:"
|
176
176
|
)
|
@@ -183,11 +183,12 @@ class RNSA(BaseNSA):
|
|
183
183
|
# If the detector is valid, add it to the list of valid detectors.
|
184
184
|
if valid_detector is not False:
|
185
185
|
discard_count = 0
|
186
|
-
|
187
|
-
valid_detector[1]
|
188
|
-
|
186
|
+
if self.algorithm == "V-detector" and isinstance(valid_detector, tuple):
|
187
|
+
radius = valid_detector[1]
|
188
|
+
else:
|
189
|
+
radius = None
|
189
190
|
valid_detectors_set.append(Detector(vector_x, radius))
|
190
|
-
if verbose:
|
191
|
+
if verbose and progress is not None:
|
191
192
|
progress.update(1)
|
192
193
|
else:
|
193
194
|
discard_count += 1
|
@@ -197,7 +198,7 @@ class RNSA(BaseNSA):
|
|
197
198
|
# Add detectors, with classes as keys in the dictionary.
|
198
199
|
list_detectors_by_class[_class_] = valid_detectors_set
|
199
200
|
# Notify completion of detector generation for the classes.
|
200
|
-
if verbose:
|
201
|
+
if verbose and progress is not None:
|
201
202
|
progress.set_description(
|
202
203
|
f"\033[92m✔ Non-self detectors for classes ({', '.join(map(str, self.classes))}) "
|
203
204
|
f"successfully generated\033[0m"
|
@@ -258,9 +259,7 @@ class RNSA(BaseNSA):
|
|
258
259
|
elif not class_found:
|
259
260
|
average_distance: dict = {}
|
260
261
|
for _class_ in self.classes:
|
261
|
-
detectores =
|
262
|
-
map(lambda x: x.position, self._detectors[_class_])
|
263
|
-
)
|
262
|
+
detectores = [x.position for x in self._detectors[_class_]]
|
264
263
|
average_distance[_class_] = np.average(
|
265
264
|
[self.__distance(detector, line) for detector in detectores]
|
266
265
|
)
|
@@ -291,17 +290,17 @@ class RNSA(BaseNSA):
|
|
291
290
|
# If self.k > 1, uses the k nearest neighbors (kNN); otherwise, checks the detector
|
292
291
|
# without considering kNN.
|
293
292
|
if self.k > 1:
|
294
|
-
knn_list = []
|
293
|
+
knn_list: list = []
|
295
294
|
for x in x_class:
|
296
295
|
# Calculates the distance between the two vectors and adds it to the kNN list if
|
297
296
|
# the distance is smaller than the largest distance in the list.
|
298
|
-
|
297
|
+
self.__compare_knearest_neighbors_list(
|
299
298
|
knn_list, self.__distance(x, vector_x)
|
300
299
|
)
|
301
300
|
# If the average of the distances in the kNN list is less than the radius, Returns true.
|
302
301
|
distance_mean = np.mean(knn_list)
|
303
302
|
if self.algorithm == "V-detector":
|
304
|
-
return self.__detector_is_valid_to_vdetector(distance_mean, vector_x)
|
303
|
+
return self.__detector_is_valid_to_vdetector(float(distance_mean), vector_x)
|
305
304
|
if distance_mean > (self.r + self.r_s):
|
306
305
|
return True
|
307
306
|
else:
|
@@ -323,8 +322,8 @@ class RNSA(BaseNSA):
|
|
323
322
|
return False # Detector is not valid!
|
324
323
|
|
325
324
|
def __compare_knearest_neighbors_list(
|
326
|
-
self, knn:
|
327
|
-
) ->
|
325
|
+
self, knn: list, distance: float
|
326
|
+
) -> None:
|
328
327
|
"""
|
329
328
|
Compare the k-nearest neighbor distance at position k=1 in the list knn.
|
330
329
|
|
@@ -336,17 +335,11 @@ class RNSA(BaseNSA):
|
|
336
335
|
List of k-nearest neighbor distances.
|
337
336
|
distance : float
|
338
337
|
Distance to check.
|
339
|
-
|
340
|
-
Returns
|
341
|
-
-------
|
342
|
-
knn : npt.NDArray
|
343
|
-
Updated and sorted nearest neighbor list.
|
344
338
|
"""
|
345
339
|
# If the number of distances in kNN is less than k, adds the distance.
|
346
340
|
if len(knn) < self.k:
|
347
|
-
knn
|
341
|
+
knn.append(distance)
|
348
342
|
knn.sort()
|
349
|
-
return knn
|
350
343
|
|
351
344
|
# Otherwise, add the distance if the new distance is smaller than the largest
|
352
345
|
# distance in the list.
|
@@ -354,7 +347,6 @@ class RNSA(BaseNSA):
|
|
354
347
|
knn[self.k - 1] = distance
|
355
348
|
knn.sort()
|
356
349
|
|
357
|
-
return knn
|
358
350
|
|
359
351
|
def __compare_sample_to_detectors(self, line: npt.NDArray) -> Optional[str]:
|
360
352
|
"""
|
@@ -371,6 +363,9 @@ class RNSA(BaseNSA):
|
|
371
363
|
Returns the predicted class with the detectors or None if the sample does not qualify
|
372
364
|
for any class.
|
373
365
|
"""
|
366
|
+
if self._detectors is None:
|
367
|
+
return None
|
368
|
+
|
374
369
|
# List to store the classes and the average distance between the detectors and the sample.
|
375
370
|
possible_classes = []
|
376
371
|
for _class_ in self.classes:
|
@@ -491,7 +486,7 @@ class BNSA(BaseNSA):
|
|
491
486
|
N: int = 100,
|
492
487
|
aff_thresh: float = 0.1,
|
493
488
|
max_discards: int = 1000,
|
494
|
-
seed: int = None,
|
489
|
+
seed: Optional[int] = None,
|
495
490
|
no_label_sample_selection: Literal[
|
496
491
|
"max_average_difference", "max_nearest_difference"
|
497
492
|
] = "max_average_difference",
|
@@ -500,27 +495,27 @@ class BNSA(BaseNSA):
|
|
500
495
|
self.aff_thresh: float = sanitize_param(aff_thresh, 0.1, lambda x: 0 < x < 1)
|
501
496
|
self.max_discards: float = sanitize_param(max_discards, 1000, lambda x: x > 0)
|
502
497
|
|
503
|
-
self.seed = sanitize_seed(seed)
|
498
|
+
self.seed: Optional[int] = sanitize_seed(seed)
|
504
499
|
|
505
500
|
if self.seed is not None:
|
506
501
|
np.random.seed(seed)
|
507
502
|
|
508
|
-
self.no_label_sample_selection:
|
503
|
+
self.no_label_sample_selection: str = sanitize_param(
|
509
504
|
no_label_sample_selection,
|
510
505
|
"max_average_difference",
|
511
506
|
lambda x: x == "nearest_difference",
|
512
507
|
)
|
513
508
|
|
514
|
-
self.classes: npt.NDArray =
|
509
|
+
self.classes: Union[npt.NDArray, list] = []
|
515
510
|
self._detectors: Optional[dict] = None
|
516
|
-
self._detectors_stack: npt.NDArray = None
|
511
|
+
self._detectors_stack: Optional[npt.NDArray] = None
|
517
512
|
|
518
513
|
@property
|
519
|
-
def detectors(self) -> Dict[str, npt.NDArray[np.bool_]]:
|
514
|
+
def detectors(self) -> Optional[Dict[str, npt.NDArray[np.bool_]]]:
|
520
515
|
"""Returns the trained detectors, organized by class."""
|
521
516
|
return self._detectors
|
522
517
|
|
523
|
-
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True):
|
518
|
+
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True) -> "BNSA":
|
524
519
|
"""Training according to X and y, using the method negative selection method.
|
525
520
|
|
526
521
|
Parameters
|
@@ -539,7 +534,7 @@ class BNSA(BaseNSA):
|
|
539
534
|
Returns the instance it self.
|
540
535
|
"""
|
541
536
|
super()._check_and_raise_exceptions_fit(X, y, "BNSA")
|
542
|
-
|
537
|
+
progress = None
|
543
538
|
# Converts the entire array X to boolean
|
544
539
|
X = X.astype(np.bool_)
|
545
540
|
|
@@ -562,7 +557,7 @@ class BNSA(BaseNSA):
|
|
562
557
|
valid_detectors_set: list = []
|
563
558
|
discard_count: int = 0
|
564
559
|
# Updating the progress bar with the current class the algorithm is processing.
|
565
|
-
if verbose:
|
560
|
+
if verbose and progress is not None:
|
566
561
|
progress.set_description_str(
|
567
562
|
f"Generating the detectors for the {_class_} class:"
|
568
563
|
)
|
@@ -574,7 +569,7 @@ class BNSA(BaseNSA):
|
|
574
569
|
if check_detector_bnsa_validity(x_class, vector_x, self.aff_thresh):
|
575
570
|
discard_count = 0
|
576
571
|
valid_detectors_set.append(vector_x)
|
577
|
-
if verbose:
|
572
|
+
if verbose and progress is not None:
|
578
573
|
progress.update(1)
|
579
574
|
else:
|
580
575
|
discard_count += 1
|
@@ -585,7 +580,7 @@ class BNSA(BaseNSA):
|
|
585
580
|
list_detectors_by_class[_class_] = np.array(valid_detectors_set)
|
586
581
|
|
587
582
|
# Notify the completion of detector generation for the classes.
|
588
|
-
if verbose:
|
583
|
+
if verbose and progress is not None:
|
589
584
|
progress.set_description(
|
590
585
|
f"\033[92m✔ Non-self detectors for classes ({', '.join(map(str, self.classes))}) "
|
591
586
|
f"successfully generated\033[0m"
|
@@ -613,7 +608,7 @@ class BNSA(BaseNSA):
|
|
613
608
|
``X``. Returns``None``: If there are no detectors for the prediction.
|
614
609
|
"""
|
615
610
|
# If there are no detectors, Returns None.
|
616
|
-
if self._detectors is None:
|
611
|
+
if self._detectors is None or self._detectors_stack is None:
|
617
612
|
return None
|
618
613
|
|
619
614
|
super()._check_and_raise_exceptions_predict(
|
@@ -664,6 +659,9 @@ class BNSA(BaseNSA):
|
|
664
659
|
c : list
|
665
660
|
List of predictions to be updated with the new classification.
|
666
661
|
"""
|
662
|
+
if self._detectors is None:
|
663
|
+
raise ValueError("Detectors is not initialized.")
|
664
|
+
|
667
665
|
class_differences: dict = {}
|
668
666
|
for _class_ in self.classes:
|
669
667
|
distances = np.sum(line != self._detectors[_class_]) / self.N
|
aisp/utils/types.py
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
"""
|
2
|
+
Defines type aliases used throughout the project to improve readability.
|
3
|
+
|
4
|
+
Type Aliases
|
5
|
+
------------
|
6
|
+
FeatureType : Literal["binary-features", "continuous-features", "ranged-features"]
|
7
|
+
Specifies the type of features in the input data. Can be one of:
|
8
|
+
- "binary-features": Features with binary values (e.g., 0 or 1).
|
9
|
+
- "continuous-features": Features with continuous numeric values.
|
10
|
+
- "ranged-features": Features represented by ranges or intervals.
|
11
|
+
|
12
|
+
MetricType : Literal["manhattan", "minkowski", "euclidean"]
|
13
|
+
Specifies the distance metric to use for calculations. Possible values:
|
14
|
+
- "manhattan": The calculation of the distance is given by the expression:
|
15
|
+
√( (x₁ – x₂)² + (y₁ – y₂)² + ... + (yn – yn)²).
|
16
|
+
- "minkowski": The calculation of the distance is given by the expression:
|
17
|
+
( |X₁ – Y₁|p + |X₂ – Y₂|p + ... + |Xn – Yn|p) ¹/ₚ.
|
18
|
+
- "euclidean": The calculation of the distance is given by the expression:
|
19
|
+
( |x₁ – x₂| + |y₁ – y₂| + ... + |yn – yn|).
|
20
|
+
"""
|
21
|
+
|
22
|
+
|
23
|
+
from typing import Literal, TypeAlias
|
24
|
+
|
25
|
+
|
26
|
+
FeatureType: TypeAlias = Literal[
|
27
|
+
"binary-features",
|
28
|
+
"continuous-features",
|
29
|
+
"ranged-features"
|
30
|
+
]
|
31
|
+
MetricType: TypeAlias = Literal["manhattan", "minkowski", "euclidean"]
|
aisp/utils/validation.py
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
"""Contains functions responsible for validating data types."""
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
import numpy.typing as npt
|
5
|
+
|
6
|
+
from .types import FeatureType
|
7
|
+
from ..exceptions import UnsupportedTypeError
|
8
|
+
|
9
|
+
|
10
|
+
def detect_vector_data_type(
|
11
|
+
vector: npt.NDArray
|
12
|
+
) -> FeatureType:
|
13
|
+
"""
|
14
|
+
Detect the type of data in a vector.
|
15
|
+
|
16
|
+
The function detects if the vector contains data of type:
|
17
|
+
- "binary": binary data (boolean True/False or integer 0/1)
|
18
|
+
- "continuous": continuous data between 0.0 and 1.0 (float)
|
19
|
+
- "ranged": numerical data with values outside the normalized range (float)
|
20
|
+
|
21
|
+
Parameters
|
22
|
+
----------
|
23
|
+
vector: npt.NDArray
|
24
|
+
An array containing the data to be classified.
|
25
|
+
|
26
|
+
Returns
|
27
|
+
-------
|
28
|
+
Literal["binary-features", "continuous-features", "ranged-features"]
|
29
|
+
The classified data type of the vector.
|
30
|
+
|
31
|
+
Raises
|
32
|
+
------
|
33
|
+
UnsupportedDataTypeError
|
34
|
+
If the data type of the vector is not supported by the function.
|
35
|
+
"""
|
36
|
+
if vector.dtype == np.bool_:
|
37
|
+
return "binary-features"
|
38
|
+
|
39
|
+
if np.issubdtype(vector.dtype, np.integer) and np.isin(vector, [0, 1]).all():
|
40
|
+
return "binary-features"
|
41
|
+
|
42
|
+
if np.issubdtype(vector.dtype, np.floating):
|
43
|
+
if np.all(vector >= 0.0) and np.all(vector <= 1.0):
|
44
|
+
return "continuous-features"
|
45
|
+
return "ranged-features"
|
46
|
+
|
47
|
+
raise UnsupportedTypeError()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: aisp
|
3
|
-
Version: 0.1
|
3
|
+
Version: 0.2.1
|
4
4
|
Summary: Package with techniques of artificial immune systems.
|
5
5
|
Author-email: João Paulo da Silva Barros <jpsilvabarr@gmail.com>
|
6
6
|
Maintainer-email: Alison Zille Lopes <alisonzille@gmail.com>
|
@@ -26,6 +26,11 @@ Requires-Dist: numpy>=1.22.4
|
|
26
26
|
Requires-Dist: numba>=0.59.0
|
27
27
|
Requires-Dist: scipy>=1.8.1
|
28
28
|
Requires-Dist: tqdm>=4.64.1
|
29
|
+
Provides-Extra: dev
|
30
|
+
Requires-Dist: build>=1.2.2.post1; extra == "dev"
|
31
|
+
Requires-Dist: ipykernel>=6.29.5; extra == "dev"
|
32
|
+
Requires-Dist: twine>=5.1.1; extra == "dev"
|
33
|
+
Requires-Dist: pytest>=8.3.5; extra == "dev"
|
29
34
|
Dynamic: license-file
|
30
35
|
|
31
36
|
<div align = center>
|
@@ -79,7 +84,8 @@ Artificial Immune Systems (AIS) are inspired by the vertebrate immune system, cr
|
|
79
84
|
##### Algorithms implemented:
|
80
85
|
|
81
86
|
> - [x] [**Negative Selection.**](https://ais-package.github.io/docs/aisp-techniques/Negative%20Selection/)
|
82
|
-
> - [
|
87
|
+
> - [x] **Clonal Selection Algorithms.**
|
88
|
+
> * [AIRS - Artificial Immune Recognition System](https://ais-package.github.io/docs/aisp-techniques/Clonal%20Selection%20Algorithms/)
|
83
89
|
> - [ ] *Dendritic Cells.*
|
84
90
|
> - [ ] *Immune Network Theory.*
|
85
91
|
|
@@ -126,22 +132,14 @@ pip install aisp
|
|
126
132
|
|
127
133
|
---
|
128
134
|
|
129
|
-
|
135
|
+
Explore the example notebooks available in the [AIS-Package/aisp repository](https://github.com/AIS-Package/aisp/tree/main/examples).
|
136
|
+
These notebooks demonstrate how to utilize the package's functionalities in various scenarios, including applications of the RNSA,
|
137
|
+
BNSA and AIRS algorithms on datasets such as Iris, Geyser, and Mushrooms.
|
130
138
|
|
131
|
-
|
139
|
+
You can run the notebooks directly in your browser without any local installation using Binder:
|
132
140
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
##### **Negative Selection:**
|
137
|
-
|
138
|
-
+ **RNSA** Application of negative selection techniques for classification using the Iris family flower database and Old Faithful Geyser:
|
139
|
-
+ [iris_dataBase_example](https://github.com/AIS-Package/aisp/blob/main/examples/RNSA/iris_dataBase_example_en.ipynb)
|
140
|
-
+ [geyser_dataBase_example](https://github.com/AIS-Package/aisp/blob/main/examples/RNSA/geyser_dataBase_example_en.ipynb)
|
141
|
-
+ **BNSA**
|
142
|
-
+ [mushrooms_dataBase_example](https://github.com/AIS-Package/aisp/blob/main/examples/BNSA/mushrooms_dataBase_example_en.ipynb)
|
143
|
-
|
144
|
-
---
|
141
|
+
[](https://mybinder.org/v2/gh/AIS-Package/aisp/HEAD?labpath=%2Fexamples)
|
145
142
|
|
143
|
+
> 💡 **Tip**: Binder may take a few minutes to load the environment, especially on the first launch.
|
146
144
|
</section>
|
147
145
|
</section>
|
@@ -0,0 +1,25 @@
|
|
1
|
+
aisp/__init__.py,sha256=N5aAyup46_tqU9cXfYfGuR3bdfAjcvaPc1xwFdGdD7A,112
|
2
|
+
aisp/exceptions.py,sha256=I9JaQx6p8Jo7qjwwcrqnuewQgyBdUnOSSZofPoBeDNE,1954
|
3
|
+
aisp/base/__init__.py,sha256=k2Ww9hej_32ekYhhCiYGEMLgOmDKwRt261HZ8rEurwA,102
|
4
|
+
aisp/base/_classifier.py,sha256=Ud8VLE7vNh1ddpNNg0RVET2RXCd7kvzvfvNKHKNn_GM,3734
|
5
|
+
aisp/base/mutation.py,sha256=A_AlGp8S4ooFEMW3Jgv0n0Y6tbhfusaMMWFsoH4HmD8,4762
|
6
|
+
aisp/csa/__init__.py,sha256=cJSKkbvNTpR_CKCL--h99fNPiMf3fJ73gFnZRq7uyVM,355
|
7
|
+
aisp/csa/_ai_immune_recognition_sys.py,sha256=_XqTHjqEO6sGZiIRlNNLe6Lz2PDFfDCtsbpucClvYmA,18878
|
8
|
+
aisp/csa/_base.py,sha256=jR1IIhGINn7DLo8V5iJinDn-wW-t6etcE39bAZnQylw,3595
|
9
|
+
aisp/csa/_cell.py,sha256=GUxnzvPyIbBm1YYkMhSx0tcV_oyDhJ7wAo5gtr_1CoY,1845
|
10
|
+
aisp/nsa/__init__.py,sha256=3cXuBmO-_Dp3-8ZG3Eu8e_bD1JDb-RH4Wu0UDNVD1bs,385
|
11
|
+
aisp/nsa/_base.py,sha256=3YKlZzA3yhP2uQHfhyKswbHUutlxkOR4wn6N10nSO-w,4119
|
12
|
+
aisp/nsa/_negative_selection.py,sha256=aMdbIrd4TdPxaAkHHY-HbbM5kd5f81HbE3DyB73ttX4,28467
|
13
|
+
aisp/nsa/_ns_core.py,sha256=SXkZL-p2VQygU4Pf6J5AP_yPzU4cR6aU6wx-e_vlm-c,5021
|
14
|
+
aisp/utils/__init__.py,sha256=RzpKhkg8nCZi4G0C4il97f3ESYs7Bbxq6EjTeOQQUGk,195
|
15
|
+
aisp/utils/_multiclass.py,sha256=nWd58ayVfxgdopBQc9b_xywkolJ2fGW3AN-JoD2A9Fw,1134
|
16
|
+
aisp/utils/distance.py,sha256=pIt76OUiwCry6eNEuWLYvUiW4KkeU6egjjnnmroFet8,6556
|
17
|
+
aisp/utils/metrics.py,sha256=zDAScDbHRnfu24alRcZ6fEIUaWNoCD-QCtOCFBWPPo8,1277
|
18
|
+
aisp/utils/sanitizers.py,sha256=u1GizdJ-RKfPWJLnuFiM09lpItZMhDR_EvK8YdVHwDk,1858
|
19
|
+
aisp/utils/types.py,sha256=KELzr1kSBT7hHdsABoIS1xmEBGj6gRSH5A5YNG36I_c,1324
|
20
|
+
aisp/utils/validation.py,sha256=ya7Y_6Lv7L6LAHC11EAfZRqqneCsOqrjG8i2EQFZcpA,1418
|
21
|
+
aisp-0.2.1.dist-info/licenses/LICENSE,sha256=fTqV5eBpeAZO0_jit8j4Ref9ikBSlHJ8xwj5TLg7gFk,7817
|
22
|
+
aisp-0.2.1.dist-info/METADATA,sha256=bhd0eOBVOuNN8gZ-jN3L4QWV17Qa0R2Kb-syKX5PK1U,4844
|
23
|
+
aisp-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
24
|
+
aisp-0.2.1.dist-info/top_level.txt,sha256=Q5aJi_rAVT5UNS1As0ZafoyS5dwNibnoyOYV7RWUB9s,5
|
25
|
+
aisp-0.2.1.dist-info/RECORD,,
|
aisp-0.1.42.dist-info/RECORD
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
aisp/__init__.py,sha256=N5aAyup46_tqU9cXfYfGuR3bdfAjcvaPc1xwFdGdD7A,112
|
2
|
-
aisp/exceptions.py,sha256=M2H_oM-ccIkDGpeFA3CyklZlgMcjTVvOCTGLU2sxFi8,1447
|
3
|
-
aisp/base/__init__.py,sha256=k2Ww9hej_32ekYhhCiYGEMLgOmDKwRt261HZ8rEurwA,102
|
4
|
-
aisp/base/_classifier.py,sha256=_HJiL1fCNqoB5KlNUN5pH9Yuu_btOze39h0SdnBw7ug,3672
|
5
|
-
aisp/nsa/__init__.py,sha256=3cXuBmO-_Dp3-8ZG3Eu8e_bD1JDb-RH4Wu0UDNVD1bs,385
|
6
|
-
aisp/nsa/_base.py,sha256=D_N-VIESvGFhdf_A2NETV-JaZJ6ISankrbRzWXSMiXM,4140
|
7
|
-
aisp/nsa/_negative_selection.py,sha256=-hqMspYvtPAb38qV1_NF5HmDCOGDWGL89BZ3M4eHiao,28141
|
8
|
-
aisp/nsa/_ns_core.py,sha256=SXkZL-p2VQygU4Pf6J5AP_yPzU4cR6aU6wx-e_vlm-c,5021
|
9
|
-
aisp/utils/__init__.py,sha256=RzpKhkg8nCZi4G0C4il97f3ESYs7Bbxq6EjTeOQQUGk,195
|
10
|
-
aisp/utils/_multiclass.py,sha256=nWd58ayVfxgdopBQc9b_xywkolJ2fGW3AN-JoD2A9Fw,1134
|
11
|
-
aisp/utils/distance.py,sha256=pIt76OUiwCry6eNEuWLYvUiW4KkeU6egjjnnmroFet8,6556
|
12
|
-
aisp/utils/metrics.py,sha256=zDAScDbHRnfu24alRcZ6fEIUaWNoCD-QCtOCFBWPPo8,1277
|
13
|
-
aisp/utils/sanitizers.py,sha256=u1GizdJ-RKfPWJLnuFiM09lpItZMhDR_EvK8YdVHwDk,1858
|
14
|
-
aisp-0.1.42.dist-info/licenses/LICENSE,sha256=fTqV5eBpeAZO0_jit8j4Ref9ikBSlHJ8xwj5TLg7gFk,7817
|
15
|
-
aisp-0.1.42.dist-info/METADATA,sha256=rsVoFnO5H9UV643wtHx7vvNWBxxg6QPcgjrbfQGOF0Y,4818
|
16
|
-
aisp-0.1.42.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
17
|
-
aisp-0.1.42.dist-info/top_level.txt,sha256=Q5aJi_rAVT5UNS1As0ZafoyS5dwNibnoyOYV7RWUB9s,5
|
18
|
-
aisp-0.1.42.dist-info/RECORD,,
|
File without changes
|
File without changes
|