aisp 0.1.42__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aisp/base/mutation.py +86 -0
- aisp/csa/__init__.py +9 -0
- aisp/csa/_ai_immune_recognition_sys.py +506 -0
- aisp/csa/_base.py +119 -0
- aisp/csa/_cell.py +47 -0
- {aisp-0.1.42.dist-info → aisp-0.2.0.dist-info}/METADATA +9 -16
- {aisp-0.1.42.dist-info → aisp-0.2.0.dist-info}/RECORD +10 -5
- {aisp-0.1.42.dist-info → aisp-0.2.0.dist-info}/WHEEL +0 -0
- {aisp-0.1.42.dist-info → aisp-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {aisp-0.1.42.dist-info → aisp-0.2.0.dist-info}/top_level.txt +0 -0
aisp/base/mutation.py
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
"""
|
2
|
+
The functions perform utilize Numba decorators for Just-In-Time compilation.
|
3
|
+
|
4
|
+
Contains functions that generate sets of mutated clones from continuous or binary vectors,
|
5
|
+
simulating the clonal expansion process in artificial immune systems.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
import numpy.typing as npt
|
10
|
+
from numba import njit, types
|
11
|
+
|
12
|
+
|
13
|
+
@njit([(types.float64[:], types.int64)], cache=True)
|
14
|
+
def clone_and_mutate_continuous(
|
15
|
+
vector: npt.NDArray[np.float64],
|
16
|
+
n: int
|
17
|
+
) -> npt.NDArray[np.float64]:
|
18
|
+
"""
|
19
|
+
Generate a set of mutated clones from a cell represented by a continuous vector.
|
20
|
+
|
21
|
+
This function creates `n` clones of the input vector and applies random mutations to each of
|
22
|
+
them, simulating the process of clonal expansion in artificial immune systems. Each clone
|
23
|
+
will have a random number of mutations applied in distinct positions of the original vector.
|
24
|
+
|
25
|
+
Parameters
|
26
|
+
----------
|
27
|
+
vector : npt.NDArray[np.float64]
|
28
|
+
The original immune cell with continuous values to be cloned and mutated.
|
29
|
+
n : int
|
30
|
+
The number of mutated clones to be generated.
|
31
|
+
|
32
|
+
Returns
|
33
|
+
-------
|
34
|
+
clone_set : npt.NDArray
|
35
|
+
An Array(n, len(vector)) containing the `n` mutated clones of the original vector.
|
36
|
+
"""
|
37
|
+
n_features = vector.shape[0]
|
38
|
+
clone_set = np.empty((n, n_features), dtype=np.float64)
|
39
|
+
for i in range(n):
|
40
|
+
n_mutations = np.random.randint(1, n_features)
|
41
|
+
clone = vector.copy()
|
42
|
+
position_mutations = np.random.permutation(n_features)[:n_mutations]
|
43
|
+
for j in range(n_mutations):
|
44
|
+
idx = position_mutations[j]
|
45
|
+
clone[idx] = np.float64(np.random.random())
|
46
|
+
clone_set[i] = clone
|
47
|
+
|
48
|
+
return clone_set
|
49
|
+
|
50
|
+
|
51
|
+
@njit([(types.boolean[:], types.int64)], cache=True)
|
52
|
+
def clone_and_mutate_binary(
|
53
|
+
vector: npt.NDArray[np.bool_],
|
54
|
+
n: int
|
55
|
+
) -> npt.NDArray[np.bool_]:
|
56
|
+
"""
|
57
|
+
Generate a set of mutated clones from a cell represented by a binary vector.
|
58
|
+
|
59
|
+
This function creates `n` clones of the input vector and applies random mutations to each of
|
60
|
+
them, changing some bits randomly. The process simulates clonal expansion in artificial
|
61
|
+
immune systems with discrete representations.
|
62
|
+
|
63
|
+
Parameters
|
64
|
+
----------
|
65
|
+
vector : npt.NDArray[np.bool_]
|
66
|
+
The original immune cell with binary values to be cloned and mutated.
|
67
|
+
n : int
|
68
|
+
The number of mutated clones to be generated.
|
69
|
+
|
70
|
+
Returns
|
71
|
+
-------
|
72
|
+
clone_set : npt.NDArray[np.bool_]
|
73
|
+
An Array(n, len(vector)) containing the `n` mutated clones of the original vector.
|
74
|
+
"""
|
75
|
+
n_features = vector.shape[0]
|
76
|
+
clone_set = np.empty((n, n_features), dtype=np.bool_)
|
77
|
+
for i in range(n):
|
78
|
+
n_mutations = np.random.randint(1, n_features)
|
79
|
+
clone = vector.copy()
|
80
|
+
position_mutations = np.random.permutation(n_features)[:n_mutations]
|
81
|
+
for j in range(n_mutations):
|
82
|
+
idx = position_mutations[j]
|
83
|
+
clone[idx] = np.bool_(np.random.randint(0, 2))
|
84
|
+
clone_set[i] = clone
|
85
|
+
|
86
|
+
return clone_set
|
aisp/csa/__init__.py
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
"""Module (CSA) Clonal Selection Algorithm.
|
2
|
+
|
3
|
+
CSAs are inspired by the process of antibody proliferation upon detecting an antigen, during which
|
4
|
+
the generated antibodies undergo mutations in an attempt to enhance pathogen recognition.
|
5
|
+
"""
|
6
|
+
from ._ai_immune_recognition_sys import AIRS
|
7
|
+
|
8
|
+
__author__ = 'João Paulo da Silva Barros'
|
9
|
+
__all__ = ['AIRS']
|
@@ -0,0 +1,506 @@
|
|
1
|
+
"""Artificial Immune Recognition System (AIRS)."""
|
2
|
+
|
3
|
+
import random
|
4
|
+
from collections import Counter
|
5
|
+
from heapq import nlargest
|
6
|
+
from operator import attrgetter
|
7
|
+
from typing import List, Literal, Optional, Dict
|
8
|
+
|
9
|
+
import numpy as np
|
10
|
+
import numpy.typing as npt
|
11
|
+
from scipy.spatial.distance import pdist
|
12
|
+
from tqdm import tqdm
|
13
|
+
|
14
|
+
from ._cell import Cell
|
15
|
+
from ..utils.sanitizers import sanitize_param, sanitize_seed, sanitize_choice
|
16
|
+
from ..utils.distance import hamming, compute_metric_distance, get_metric_code
|
17
|
+
from ._base import BaseAIRS
|
18
|
+
|
19
|
+
|
20
|
+
class _ARB(Cell):
|
21
|
+
"""ARB (Artificial recognition ball).
|
22
|
+
|
23
|
+
Individual from the set of recognizing cells (ARB), inherits characteristics from a B-cell,
|
24
|
+
adding resource consumption
|
25
|
+
|
26
|
+
Parameters
|
27
|
+
----------
|
28
|
+
vector : npt.NDArray
|
29
|
+
A vector of cell features.
|
30
|
+
stimulation : Optional[float], default=None
|
31
|
+
The rate at which the cell stimulates antigens.
|
32
|
+
"""
|
33
|
+
|
34
|
+
def __init__(
|
35
|
+
self,
|
36
|
+
vector: npt.NDArray,
|
37
|
+
stimulation: Optional[float] = None
|
38
|
+
) -> None:
|
39
|
+
super().__init__(vector)
|
40
|
+
self.resource: float = 0.0
|
41
|
+
if stimulation is not None:
|
42
|
+
self.stimulation: float = stimulation
|
43
|
+
|
44
|
+
def consume_resource(self, n_resource: float, amplified: float = 1) -> float:
|
45
|
+
"""
|
46
|
+
Update the amount of resources available for an ARB after consumption.
|
47
|
+
|
48
|
+
This function consumes the resources and returns the remaining amount of resources after
|
49
|
+
consumption.
|
50
|
+
|
51
|
+
Parameters
|
52
|
+
----------
|
53
|
+
n_resource : float
|
54
|
+
Amount of resources.
|
55
|
+
amplified : float
|
56
|
+
Amplifier for the resource consumption by the cell. It is multiplied by the cell's
|
57
|
+
stimulus. The default value is 1.
|
58
|
+
|
59
|
+
Returns
|
60
|
+
-------
|
61
|
+
n_resource : float
|
62
|
+
The remaining amount of resources after consumption.
|
63
|
+
"""
|
64
|
+
consumption = self.stimulation * amplified
|
65
|
+
n_resource -= consumption
|
66
|
+
if n_resource < 0:
|
67
|
+
return 0
|
68
|
+
|
69
|
+
self.resource = consumption
|
70
|
+
return n_resource
|
71
|
+
|
72
|
+
def to_cell(self) -> Cell:
|
73
|
+
"""Convert this _ARB into a pure Cell object."""
|
74
|
+
return Cell(self.vector)
|
75
|
+
|
76
|
+
|
77
|
+
class AIRS(BaseAIRS):
|
78
|
+
"""Artificial Immune Recognition System (AIRS).
|
79
|
+
|
80
|
+
The Artificial Immune Recognition System (AIRS) is a classification algorithm inspired by the
|
81
|
+
clonal selection process of the biological immune system. This implementation is based on the
|
82
|
+
simplified AIRS2 version described in [1]_. The algorithm has been adapted to support both
|
83
|
+
real-valued (continuous) and binary feature datasets.
|
84
|
+
|
85
|
+
Parameters
|
86
|
+
----------
|
87
|
+
n_resources : float, default=10
|
88
|
+
Total amount of available resources.
|
89
|
+
rate_clonal : float, default=10
|
90
|
+
Maximum number of possible clones of a class. This quantity is multiplied by (
|
91
|
+
cell_stimulus * rate_hypermutation) to define the number of clones.
|
92
|
+
rate_mc_init : float, default=0.2
|
93
|
+
Percentage of samples used to initialize memory cells.
|
94
|
+
rate_hypermutation : float, default=0.75
|
95
|
+
The rate of mutated clones derived from rate_clonal as a scalar factor.
|
96
|
+
affinity_threshold_scalar : float, default=0.75
|
97
|
+
Normalized affinity threshold.
|
98
|
+
k : int, default=3
|
99
|
+
The number of K nearest neighbors that will be used to choose a label in the prediction.
|
100
|
+
max_iters : int, default=100
|
101
|
+
Maximum number of interactions in the refinement process of the ARB set exposed to aᵢ.
|
102
|
+
resource_amplified : float, default=1.0
|
103
|
+
Resource consumption amplifier is multiplied with the incentive to subtract resources.
|
104
|
+
Defaults to 1.0 without amplification.
|
105
|
+
metric : Literal["manhattan", "minkowski", "euclidean"], default="euclidean"
|
106
|
+
Way to calculate the distance between the detector and the sample:
|
107
|
+
|
108
|
+
* ``'Euclidean'`` ➜ The calculation of the distance is given by the expression:
|
109
|
+
√( (x₁ – x₂)² + (y₁ – y₂)² + ... + (yn – yn)²).
|
110
|
+
|
111
|
+
* ``'minkowski'`` ➜ The calculation of the distance is given by the expression:
|
112
|
+
( |X₁ – Y₁|p + |X₂ – Y₂|p + ... + |Xn – Yn|p) ¹/ₚ.
|
113
|
+
|
114
|
+
* ``'manhattan'`` ➜ The calculation of the distance is given by the expression:
|
115
|
+
( |x₁ – x₂| + |y₁ – y₂| + ... + |yn – yn|).
|
116
|
+
|
117
|
+
algorithm : Literal["continuous-features", "binary-features"], default="continuous-features"
|
118
|
+
Specifies the type of algorithm to use based on the nature of the input features:
|
119
|
+
|
120
|
+
* ``continuous-features``: selects an algorithm designed for continuous data, which should
|
121
|
+
be normalized within the range [0, 1].
|
122
|
+
|
123
|
+
* ``binary-features``: selects an algorithm specialized for handling binary variables.
|
124
|
+
|
125
|
+
seed : int
|
126
|
+
Seed for the random generation of detector values. Defaults to None.
|
127
|
+
|
128
|
+
**kwargs
|
129
|
+
p : float
|
130
|
+
This parameter stores the value of ``p`` used in the Minkowsks distance. The default
|
131
|
+
is ``2``, which represents normalized Euclidean distance.\
|
132
|
+
Different values of p lead to different variants of the Minkowski Distance.
|
133
|
+
|
134
|
+
Notes
|
135
|
+
-----
|
136
|
+
This implementation is inspired by AIRS2, a simplified version of the original AIRS algorithm.
|
137
|
+
Introducing adaptations to handle continuous and binary datasets.
|
138
|
+
|
139
|
+
Based on Algorithm 16.5 from Brabazon et al. [1]_.
|
140
|
+
|
141
|
+
Related and noteworthy works: access here [2]_.
|
142
|
+
|
143
|
+
References
|
144
|
+
----------
|
145
|
+
.. [1] Brabazon, A., O’Neill, M., & McGarraghy, S. (2015). Natural Computing Algorithms. In
|
146
|
+
Natural Computing Series. Springer Berlin Heidelberg.
|
147
|
+
https://doi.org/10.1007/978-3-662-43631-8
|
148
|
+
|
149
|
+
.. [2] AZZOUG, Aghiles. Artificial Immune Recognition System V2.
|
150
|
+
Available at: https://github.com/AghilesAzzoug/Artificial-Immune-System
|
151
|
+
"""
|
152
|
+
|
153
|
+
def __init__(
|
154
|
+
self,
|
155
|
+
n_resources: float = 10,
|
156
|
+
rate_clonal: int = 10,
|
157
|
+
rate_mc_init: float = 0.2,
|
158
|
+
rate_hypermutation: float = 0.75,
|
159
|
+
affinity_threshold_scalar: float = 0.75,
|
160
|
+
k: int = 3,
|
161
|
+
max_iters: int = 100,
|
162
|
+
resource_amplified: float = 1.0,
|
163
|
+
metric: Literal["manhattan", "minkowski", "euclidean"] = "euclidean",
|
164
|
+
algorithm: Literal[
|
165
|
+
"continuous-features", "binary-features"
|
166
|
+
] = "continuous-features",
|
167
|
+
seed: int = None,
|
168
|
+
**kwargs,
|
169
|
+
) -> None:
|
170
|
+
self.n_resources: float = sanitize_param(n_resources, 10, lambda x: x >= 1)
|
171
|
+
self.rate_mc_init: float = sanitize_param(
|
172
|
+
rate_mc_init, 0.2, lambda x: 0 < x <= 1
|
173
|
+
)
|
174
|
+
self.rate_clonal: int = sanitize_param(rate_clonal, 10, lambda x: x > 0)
|
175
|
+
self.rate_hypermutation: float = sanitize_param(
|
176
|
+
rate_hypermutation, 0.75, lambda x: x > 0
|
177
|
+
)
|
178
|
+
self.affinity_threshold_scalar: float = sanitize_param(
|
179
|
+
affinity_threshold_scalar, 0.75, lambda x: x > 0
|
180
|
+
)
|
181
|
+
self.resource_amplified: float = sanitize_param(
|
182
|
+
resource_amplified, 1, lambda x: x > 1
|
183
|
+
)
|
184
|
+
self.k: int = sanitize_param(k, 3, lambda x: x > 3)
|
185
|
+
self.max_iters: int = sanitize_param(max_iters, 100, lambda x: x > 0)
|
186
|
+
self.seed: int = sanitize_seed(seed)
|
187
|
+
if self.seed is not None:
|
188
|
+
np.random.seed(self.seed)
|
189
|
+
|
190
|
+
self.algorithm: Literal["continuous-features", "binary-features"] = (
|
191
|
+
sanitize_param(
|
192
|
+
algorithm, "continuous-features", lambda x: x == "binary-features"
|
193
|
+
)
|
194
|
+
)
|
195
|
+
|
196
|
+
if algorithm == "binary-features":
|
197
|
+
self.metric: str = "hamming"
|
198
|
+
else:
|
199
|
+
self.metric: str = sanitize_choice(
|
200
|
+
metric, ["manhattan", "minkowski"], "euclidean"
|
201
|
+
)
|
202
|
+
|
203
|
+
self.p: np.float64 = np.float64(kwargs.get("p", 2.0))
|
204
|
+
|
205
|
+
self._cells_memory = None
|
206
|
+
self.affinity_threshold = 0.0
|
207
|
+
self.classes = None
|
208
|
+
|
209
|
+
@property
|
210
|
+
def cells_memory(self) -> Dict[str, list[Cell]]:
|
211
|
+
"""Returns the trained cells memory, organized by class."""
|
212
|
+
return self._cells_memory
|
213
|
+
|
214
|
+
def fit(self, X: npt.NDArray, y: npt.NDArray, verbose: bool = True):
|
215
|
+
"""
|
216
|
+
Fit the model to the training data using the AIRS.
|
217
|
+
|
218
|
+
The function ``fit(...)``, performs the training according to ``X`` and ``y``, using the
|
219
|
+
method AIRS.
|
220
|
+
|
221
|
+
Parameters
|
222
|
+
----------
|
223
|
+
X : npt.NDArray
|
224
|
+
Training array, containing the samples and their characteristics,
|
225
|
+
[``N samples`` (rows)][``N features`` (columns)].
|
226
|
+
y : npt.NDArray
|
227
|
+
Array of target classes of ``X`` with [``N samples`` (lines)].
|
228
|
+
verbose : bool
|
229
|
+
Feedback on which sample aᵢ the memory cells are being generated.
|
230
|
+
|
231
|
+
Returns
|
232
|
+
-------
|
233
|
+
AIRS
|
234
|
+
Returns the instance itself.
|
235
|
+
"""
|
236
|
+
progress = None
|
237
|
+
|
238
|
+
super()._check_and_raise_exceptions_fit(X, y, self.algorithm)
|
239
|
+
|
240
|
+
if self.algorithm == "binary-features":
|
241
|
+
X = X.astype(np.bool_)
|
242
|
+
|
243
|
+
self.classes = np.unique(y)
|
244
|
+
sample_index = self._slice_index_list_by_class(y)
|
245
|
+
if verbose:
|
246
|
+
progress = tqdm(
|
247
|
+
total=len(y),
|
248
|
+
postfix="\n",
|
249
|
+
bar_format="{desc} ┇{bar}┇ {n}/{total} memory cells for each aᵢ",
|
250
|
+
)
|
251
|
+
pool_cells_classes = {}
|
252
|
+
for _class_ in self.classes:
|
253
|
+
if verbose:
|
254
|
+
progress.set_description_str(
|
255
|
+
f"Generating the memory cells for the {_class_} class:"
|
256
|
+
)
|
257
|
+
|
258
|
+
x_class = X[sample_index[_class_]]
|
259
|
+
# Calculating the similarity threshold between antigens
|
260
|
+
self._cells_affinity_threshold(x_class)
|
261
|
+
sufficiently_similar = (
|
262
|
+
self.affinity_threshold * self.affinity_threshold_scalar
|
263
|
+
)
|
264
|
+
# Initialize memory cells for a class.
|
265
|
+
pool_c: list[Cell] = self._init_memory_c(x_class)
|
266
|
+
|
267
|
+
for ai in x_class:
|
268
|
+
# Calculating the stimulation of memory cells with aᵢ and selecting the largest
|
269
|
+
# stimulation from the memory set.
|
270
|
+
c_match = None
|
271
|
+
match_stimulation = -1
|
272
|
+
for cell in pool_c:
|
273
|
+
stimulation = self._affinity(cell.vector, ai)
|
274
|
+
if stimulation > match_stimulation:
|
275
|
+
match_stimulation = stimulation
|
276
|
+
c_match = cell
|
277
|
+
|
278
|
+
arb_list: list[_ARB] = [
|
279
|
+
_ARB(
|
280
|
+
vector=c_match.vector,
|
281
|
+
stimulation=match_stimulation
|
282
|
+
)
|
283
|
+
]
|
284
|
+
|
285
|
+
set_clones: npt.NDArray = c_match.hyper_clonal_mutate(
|
286
|
+
int(self.rate_hypermutation * self.rate_clonal * match_stimulation),
|
287
|
+
self.algorithm
|
288
|
+
)
|
289
|
+
|
290
|
+
for clone in set_clones:
|
291
|
+
arb_list.append(
|
292
|
+
_ARB(
|
293
|
+
vector=clone,
|
294
|
+
stimulation=self._affinity(clone, ai),
|
295
|
+
)
|
296
|
+
)
|
297
|
+
|
298
|
+
c_candidate = self._refinement_arb(ai, match_stimulation, arb_list)
|
299
|
+
|
300
|
+
if c_candidate.stimulation > match_stimulation:
|
301
|
+
pool_c.append(c_candidate.to_cell())
|
302
|
+
if self._affinity(c_candidate.vector, c_match.vector) < sufficiently_similar:
|
303
|
+
pool_c.remove(c_match)
|
304
|
+
|
305
|
+
if verbose:
|
306
|
+
progress.update(1)
|
307
|
+
pool_cells_classes[_class_] = pool_c
|
308
|
+
|
309
|
+
if verbose:
|
310
|
+
progress.set_description(
|
311
|
+
f"\033[92m✔ Set of memory cells for classes ({', '.join(map(str, self.classes))}) "
|
312
|
+
f"successfully generated\033[0m"
|
313
|
+
)
|
314
|
+
self._cells_memory = pool_cells_classes
|
315
|
+
return self
|
316
|
+
|
317
|
+
def predict(self, X: npt.NDArray) -> Optional[npt.NDArray]:
|
318
|
+
"""
|
319
|
+
Predict class labels based on the memory cells created during training.
|
320
|
+
|
321
|
+
This method uses the trained memory cells to perform classification of the input data
|
322
|
+
using the k-nearest neighbors approach.
|
323
|
+
|
324
|
+
Parameters
|
325
|
+
----------
|
326
|
+
X : npt.NDArray
|
327
|
+
Array with input samples with [``N samples`` (Lines)] and [``N characteristics``(
|
328
|
+
Columns)]
|
329
|
+
|
330
|
+
Returns
|
331
|
+
-------
|
332
|
+
C : npt.NDArray or None
|
333
|
+
An ndarray of the form ``C`` [``N samples``], containing the predicted classes for
|
334
|
+
``X``. or ``None``: If there are no detectors for the prediction.
|
335
|
+
"""
|
336
|
+
if self._cells_memory is None:
|
337
|
+
return None
|
338
|
+
|
339
|
+
super()._check_and_raise_exceptions_predict(
|
340
|
+
X, len(self._cells_memory[self.classes[0]][0].vector), self.algorithm
|
341
|
+
)
|
342
|
+
|
343
|
+
c: list = []
|
344
|
+
|
345
|
+
all_cells_memory = [
|
346
|
+
(class_name, cell.vector)
|
347
|
+
for class_name in self.classes
|
348
|
+
for cell in self._cells_memory[class_name]
|
349
|
+
]
|
350
|
+
|
351
|
+
for line in X:
|
352
|
+
label_stim_list = [
|
353
|
+
(class_name, self._affinity(memory, line))
|
354
|
+
for class_name, memory in all_cells_memory
|
355
|
+
]
|
356
|
+
# Create the list with the k nearest neighbors and select the class with the most votes
|
357
|
+
k_nearest = nlargest(self.k, label_stim_list, key=lambda x: x[1])
|
358
|
+
votes = Counter(label for label, _ in k_nearest)
|
359
|
+
c.append(votes.most_common(1)[0][0])
|
360
|
+
return np.array(c)
|
361
|
+
|
362
|
+
def _refinement_arb(
|
363
|
+
self, ai: npt.NDArray, c_match_stimulation: float, arb_list: List[_ARB]
|
364
|
+
) -> _ARB:
|
365
|
+
"""
|
366
|
+
Refine the ARB set until the average stimulation exceeds the defined threshold.
|
367
|
+
|
368
|
+
This method iteratively refines the ARB set by comparing the average stimulation
|
369
|
+
against the `affinity_threshold_scalar`. Refinement continues through multiple iterations
|
370
|
+
until the threshold is met or exceeded.
|
371
|
+
|
372
|
+
Parameters
|
373
|
+
----------
|
374
|
+
ai : npt.NDArray
|
375
|
+
The current antigen.
|
376
|
+
c_match_stimulation : float
|
377
|
+
The highest stimulation relative to aᵢ
|
378
|
+
arb_list : List[_ARB]
|
379
|
+
ARB set.
|
380
|
+
|
381
|
+
Returns
|
382
|
+
-------
|
383
|
+
_ARB
|
384
|
+
The cell with the highest ARB stimulation
|
385
|
+
|
386
|
+
Notes
|
387
|
+
-----
|
388
|
+
Based on Algorithm 16.6 from Brabazon et al. [1]_.
|
389
|
+
|
390
|
+
References
|
391
|
+
----------
|
392
|
+
.. [1] Brabazon, A., O’Neill, M., & McGarraghy, S. (2015).
|
393
|
+
Natural Computing Algorithms. Natural Computing Series.
|
394
|
+
Springer Berlin Heidelberg. https://doi.org/10.1007/978-3-662-43631-8
|
395
|
+
"""
|
396
|
+
iters = 0
|
397
|
+
while True:
|
398
|
+
iters += 1
|
399
|
+
arb_list.sort(key=attrgetter("stimulation"), reverse=True)
|
400
|
+
resource = self.n_resources
|
401
|
+
for arb in arb_list:
|
402
|
+
resource = arb.consume_resource(
|
403
|
+
n_resource=resource, amplified=self.resource_amplified
|
404
|
+
)
|
405
|
+
if resource == 0:
|
406
|
+
break
|
407
|
+
# remove cells without resources and calculate the average ARB stimulus.
|
408
|
+
arb_list = [cell for cell in arb_list if cell.resource > 0]
|
409
|
+
if not arb_list:
|
410
|
+
break
|
411
|
+
avg_stimulation = sum(item.stimulation for item in arb_list) / len(arb_list)
|
412
|
+
|
413
|
+
if iters == self.max_iters or avg_stimulation > self.affinity_threshold:
|
414
|
+
break
|
415
|
+
|
416
|
+
# pick a random cell for mutations.
|
417
|
+
random_index = random.randint(0, len(arb_list) - 1)
|
418
|
+
clone_arb = arb_list[random_index].hyper_clonal_mutate(
|
419
|
+
int(self.rate_clonal * c_match_stimulation),
|
420
|
+
self.algorithm
|
421
|
+
)
|
422
|
+
|
423
|
+
arb_list = [
|
424
|
+
_ARB(
|
425
|
+
vector=clone,
|
426
|
+
stimulation=self._affinity(clone, ai)
|
427
|
+
)
|
428
|
+
for clone in clone_arb
|
429
|
+
]
|
430
|
+
|
431
|
+
return max(arb_list, key=attrgetter("stimulation"))
|
432
|
+
|
433
|
+
def _cells_affinity_threshold(self, antigens_list: npt.NDArray):
|
434
|
+
"""
|
435
|
+
Calculate the affinity threshold based on the average affinity between training instances.
|
436
|
+
|
437
|
+
This function calculates the affinity threshold based on the average affinity between
|
438
|
+
training instances, where aᵢ and aⱼ are a pair of antigens, and affinity
|
439
|
+
is measured by distance (Euclidean, Manhattan, Minkowski, Hamming).
|
440
|
+
Following the formula:
|
441
|
+
|
442
|
+
> affinity_threshold = (Σᵢ=₁ⁿ⁻¹ Σⱼ=ᵢ₊₁ⁿ affinity(aᵢ, aⱼ)) / (n(n-1)/2
|
443
|
+
|
444
|
+
Parameters
|
445
|
+
----------
|
446
|
+
antigens_list : npt.NDArray
|
447
|
+
List of training antigens.
|
448
|
+
"""
|
449
|
+
if self.algorithm == "binary-features":
|
450
|
+
distances = pdist(antigens_list, metric="hamming")
|
451
|
+
elif self.metric == "minkowski":
|
452
|
+
distances = pdist(antigens_list, metric="minkowski", p=self.p)
|
453
|
+
else:
|
454
|
+
distances = pdist(antigens_list, metric=self.metric)
|
455
|
+
n = antigens_list.shape[0]
|
456
|
+
sum_affinity = np.sum(1.0 - (distances / (1.0 + distances)))
|
457
|
+
self.affinity_threshold = 1.0 - (sum_affinity / ((n * (n - 1)) / 2))
|
458
|
+
|
459
|
+
def _affinity(self, u: npt.NDArray, v: npt.NDArray) -> float:
|
460
|
+
"""
|
461
|
+
Calculate the stimulus between two vectors using metrics.
|
462
|
+
|
463
|
+
Parameters
|
464
|
+
----------
|
465
|
+
u : npt.NDArray
|
466
|
+
Coordinates of the first point.
|
467
|
+
v : npt.NDArray
|
468
|
+
Coordinates of the second point.
|
469
|
+
|
470
|
+
Returns
|
471
|
+
-------
|
472
|
+
float
|
473
|
+
The stimulus rate between the vectors.
|
474
|
+
"""
|
475
|
+
distance: float
|
476
|
+
if self.algorithm == "binary-features":
|
477
|
+
distance = hamming(u, v)
|
478
|
+
else:
|
479
|
+
distance = compute_metric_distance(
|
480
|
+
u, v, get_metric_code(self.metric), self.p
|
481
|
+
)
|
482
|
+
return 1 - (distance / (1 + distance))
|
483
|
+
|
484
|
+
def _init_memory_c(self, antigens_list: npt.NDArray) -> List[Cell]:
|
485
|
+
"""
|
486
|
+
Initialize memory cells by randomly selecting `rate_mc_init` antigens.
|
487
|
+
|
488
|
+
Parameters
|
489
|
+
----------
|
490
|
+
antigens_list : npt.NDArray
|
491
|
+
List of training antigens.
|
492
|
+
|
493
|
+
Returns
|
494
|
+
-------
|
495
|
+
List[Cell]
|
496
|
+
List of initialized memories.
|
497
|
+
"""
|
498
|
+
n = antigens_list.shape[0]
|
499
|
+
n_cells = int(n * self.rate_mc_init)
|
500
|
+
|
501
|
+
if n == 0 or n_cells == 0:
|
502
|
+
return []
|
503
|
+
|
504
|
+
permutation = np.random.permutation(n)
|
505
|
+
selected = antigens_list[permutation[:n_cells]]
|
506
|
+
return [Cell(ai) for ai in selected]
|
aisp/csa/_base.py
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
"""Base Class for Clonal Selection Algorithm."""
|
2
|
+
|
3
|
+
from abc import ABC
|
4
|
+
from typing import Literal
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
import numpy.typing as npt
|
8
|
+
|
9
|
+
from aisp.exceptions import FeatureDimensionMismatch
|
10
|
+
from ..base import BaseClassifier
|
11
|
+
|
12
|
+
|
13
|
+
class BaseAIRS(BaseClassifier, ABC):
|
14
|
+
"""
|
15
|
+
Base class for algorithm AIRS.
|
16
|
+
|
17
|
+
The base class contains functions that are used by more than one class in the package, and
|
18
|
+
therefore are considered essential for the overall functioning of the system.
|
19
|
+
"""
|
20
|
+
|
21
|
+
@staticmethod
|
22
|
+
def _check_and_raise_exceptions_fit(
|
23
|
+
X: npt.NDArray = None,
|
24
|
+
y: npt.NDArray = None,
|
25
|
+
algorithm: Literal[
|
26
|
+
"continuous-features", "binary-features"
|
27
|
+
] = "continuous-features"
|
28
|
+
):
|
29
|
+
"""
|
30
|
+
Verify the fit parameters and throw exceptions if the verification is not successful.
|
31
|
+
|
32
|
+
Parameters
|
33
|
+
----------
|
34
|
+
X : npt.NDArray
|
35
|
+
Training array, containing the samples and their characteristics,
|
36
|
+
[``N samples`` (rows)][``N features`` (columns)].
|
37
|
+
y : npt.NDArray
|
38
|
+
Array of target classes of ``X`` with [``N samples`` (lines)].
|
39
|
+
algorithm : Literal["continuous-features", "binary-features"], default="continuous-features"
|
40
|
+
Specifies the type of algorithm to use, depending on whether the input data has
|
41
|
+
continuous or binary features.
|
42
|
+
|
43
|
+
Raises
|
44
|
+
------
|
45
|
+
TypeError:
|
46
|
+
If X or y are not ndarrays or have incompatible shapes.
|
47
|
+
ValueError
|
48
|
+
If algorithm is binary-features and X contains values that are not composed only
|
49
|
+
of 0 and 1.
|
50
|
+
"""
|
51
|
+
if not isinstance(X, np.ndarray):
|
52
|
+
if isinstance(X, list):
|
53
|
+
X = np.array(X)
|
54
|
+
else:
|
55
|
+
raise TypeError("X is not an ndarray or list.")
|
56
|
+
elif not isinstance(y, np.ndarray):
|
57
|
+
if isinstance(y, list):
|
58
|
+
y = np.array(y)
|
59
|
+
else:
|
60
|
+
raise TypeError("y is not an ndarray or list.")
|
61
|
+
if X.shape[0] != y.shape[0]:
|
62
|
+
raise TypeError(
|
63
|
+
"X does not have the same amount of sample for the output classes in y."
|
64
|
+
)
|
65
|
+
|
66
|
+
if algorithm == "binary-features" and not np.isin(X, [0, 1]).all():
|
67
|
+
raise ValueError(
|
68
|
+
"The array X contains values that are not composed only of 0 and 1."
|
69
|
+
)
|
70
|
+
|
71
|
+
@staticmethod
|
72
|
+
def _check_and_raise_exceptions_predict(
|
73
|
+
X: npt.NDArray = None,
|
74
|
+
expected: int = 0,
|
75
|
+
algorithm: Literal[
|
76
|
+
"continuous-features", "binary-features"
|
77
|
+
] = "continuous-features"
|
78
|
+
) -> None:
|
79
|
+
"""
|
80
|
+
Verify the predict parameters and throw exceptions if the verification is not successful.
|
81
|
+
|
82
|
+
Parameters
|
83
|
+
----------
|
84
|
+
X : npt.NDArray
|
85
|
+
Input array for prediction, containing the samples and their characteristics,
|
86
|
+
[``N samples`` (rows)][``N features`` (columns)].
|
87
|
+
expected : int, default=0
|
88
|
+
Expected number of features per sample (columns in X).
|
89
|
+
algorithm : Literal["continuous-features", "binary-features"], default="continuous-features"
|
90
|
+
Specifies the type of algorithm to use, depending on whether the input data has
|
91
|
+
continuous or binary features.
|
92
|
+
|
93
|
+
Raises
|
94
|
+
------
|
95
|
+
TypeError
|
96
|
+
If X is not a ndarray or list.
|
97
|
+
FeatureDimensionMismatch
|
98
|
+
If the number of features in X does not match the expected number.
|
99
|
+
ValueError
|
100
|
+
If algorithm is binary-features and X contains values that are not composed only
|
101
|
+
of 0 and 1.
|
102
|
+
"""
|
103
|
+
if not isinstance(X, (np.ndarray, list)):
|
104
|
+
raise TypeError("X is not an ndarray or list")
|
105
|
+
if expected != len(X[0]):
|
106
|
+
raise FeatureDimensionMismatch(
|
107
|
+
expected,
|
108
|
+
len(X[0]),
|
109
|
+
"X"
|
110
|
+
)
|
111
|
+
|
112
|
+
if algorithm != "binary-features":
|
113
|
+
return
|
114
|
+
|
115
|
+
# Checks if matrix X contains only binary samples. Otherwise, raises an exception.
|
116
|
+
if not np.isin(X, [0, 1]).all():
|
117
|
+
raise ValueError(
|
118
|
+
"The array X contains values that are not composed only of 0 and 1."
|
119
|
+
)
|
aisp/csa/_cell.py
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
"""Represents a memory B-cell."""
|
2
|
+
|
3
|
+
from dataclasses import dataclass
|
4
|
+
from typing import Literal
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
import numpy.typing as npt
|
8
|
+
|
9
|
+
from ..base.mutation import clone_and_mutate_continuous, clone_and_mutate_binary
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass(slots=True)
|
13
|
+
class Cell:
|
14
|
+
"""
|
15
|
+
Represents a memory B-cell.
|
16
|
+
|
17
|
+
Attributes
|
18
|
+
----------
|
19
|
+
vector : npt.NDArray
|
20
|
+
A vector of cell features.
|
21
|
+
"""
|
22
|
+
|
23
|
+
vector: np.ndarray
|
24
|
+
|
25
|
+
def hyper_clonal_mutate(
|
26
|
+
self,
|
27
|
+
n: int,
|
28
|
+
algorithm: Literal["continuous-features", "binary-features"] = "continuous-features"
|
29
|
+
) -> npt.NDArray:
|
30
|
+
"""
|
31
|
+
Clones N features from a cell's features, generating a set of mutated vectors.
|
32
|
+
|
33
|
+
Parameters
|
34
|
+
----------
|
35
|
+
n : int
|
36
|
+
Number of clones to be generated from mutations of the original cell.
|
37
|
+
algorithm : Literal["continuous-features", "binary-features"], default="continuous-features"
|
38
|
+
Specifies the type of algorithm to use based on the nature of the input features
|
39
|
+
|
40
|
+
Returns
|
41
|
+
-------
|
42
|
+
npt.NDArray
|
43
|
+
An array containing N mutated vectors from the original cell.
|
44
|
+
"""
|
45
|
+
if algorithm == "binary-features":
|
46
|
+
return clone_and_mutate_binary(self.vector, n)
|
47
|
+
return clone_and_mutate_continuous(self.vector, n)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: aisp
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.0
|
4
4
|
Summary: Package with techniques of artificial immune systems.
|
5
5
|
Author-email: João Paulo da Silva Barros <jpsilvabarr@gmail.com>
|
6
6
|
Maintainer-email: Alison Zille Lopes <alisonzille@gmail.com>
|
@@ -79,7 +79,8 @@ Artificial Immune Systems (AIS) are inspired by the vertebrate immune system, cr
|
|
79
79
|
##### Algorithms implemented:
|
80
80
|
|
81
81
|
> - [x] [**Negative Selection.**](https://ais-package.github.io/docs/aisp-techniques/Negative%20Selection/)
|
82
|
-
> - [
|
82
|
+
> - [x] **Clonal Selection Algorithms.**
|
83
|
+
> * [AIRS - Artificial Immune Recognition System](https://ais-package.github.io/docs/aisp-techniques/Clonal%20Selection%20Algorithms/)
|
83
84
|
> - [ ] *Dendritic Cells.*
|
84
85
|
> - [ ] *Immune Network Theory.*
|
85
86
|
|
@@ -126,22 +127,14 @@ pip install aisp
|
|
126
127
|
|
127
128
|
---
|
128
129
|
|
129
|
-
|
130
|
+
Explore the example notebooks available in the [AIS-Package/aisp repository](https://github.com/AIS-Package/aisp/tree/main/examples).
|
131
|
+
These notebooks demonstrate how to utilize the package's functionalities in various scenarios, including applications of the RNSA,
|
132
|
+
BNSA and AIRS algorithms on datasets such as Iris, Geyser, and Mushrooms.
|
130
133
|
|
131
|
-
|
134
|
+
You can run the notebooks directly in your browser without any local installation using Binder:
|
132
135
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
##### **Negative Selection:**
|
137
|
-
|
138
|
-
+ **RNSA** Application of negative selection techniques for classification using the Iris family flower database and Old Faithful Geyser:
|
139
|
-
+ [iris_dataBase_example](https://github.com/AIS-Package/aisp/blob/main/examples/RNSA/iris_dataBase_example_en.ipynb)
|
140
|
-
+ [geyser_dataBase_example](https://github.com/AIS-Package/aisp/blob/main/examples/RNSA/geyser_dataBase_example_en.ipynb)
|
141
|
-
+ **BNSA**
|
142
|
-
+ [mushrooms_dataBase_example](https://github.com/AIS-Package/aisp/blob/main/examples/BNSA/mushrooms_dataBase_example_en.ipynb)
|
143
|
-
|
144
|
-
---
|
136
|
+
[](https://mybinder.org/v2/gh/AIS-Package/aisp/HEAD?labpath=%2Fexamples)
|
145
137
|
|
138
|
+
> 💡 **Tip**: Binder may take a few minutes to load the environment, especially on the first launch.
|
146
139
|
</section>
|
147
140
|
</section>
|
@@ -2,6 +2,11 @@ aisp/__init__.py,sha256=N5aAyup46_tqU9cXfYfGuR3bdfAjcvaPc1xwFdGdD7A,112
|
|
2
2
|
aisp/exceptions.py,sha256=M2H_oM-ccIkDGpeFA3CyklZlgMcjTVvOCTGLU2sxFi8,1447
|
3
3
|
aisp/base/__init__.py,sha256=k2Ww9hej_32ekYhhCiYGEMLgOmDKwRt261HZ8rEurwA,102
|
4
4
|
aisp/base/_classifier.py,sha256=_HJiL1fCNqoB5KlNUN5pH9Yuu_btOze39h0SdnBw7ug,3672
|
5
|
+
aisp/base/mutation.py,sha256=j_2WiZDxUS3KS4QgGXaFqoLVSxSz88BpLfZTjLuGaSU,3110
|
6
|
+
aisp/csa/__init__.py,sha256=cJSKkbvNTpR_CKCL--h99fNPiMf3fJ73gFnZRq7uyVM,355
|
7
|
+
aisp/csa/_ai_immune_recognition_sys.py,sha256=0f8DQzZ7lG69xCMI1jpR0QBKZ4oNvRXpayQMUekzC5o,19233
|
8
|
+
aisp/csa/_base.py,sha256=y1OX0Z0ZGQu63fQmg1umMZ1110H8bkStP5NaGNOvgmY,4399
|
9
|
+
aisp/csa/_cell.py,sha256=PhGdXKytRYnV97pmaLLKVhaV_OwU31-92URZVMszohY,1377
|
5
10
|
aisp/nsa/__init__.py,sha256=3cXuBmO-_Dp3-8ZG3Eu8e_bD1JDb-RH4Wu0UDNVD1bs,385
|
6
11
|
aisp/nsa/_base.py,sha256=D_N-VIESvGFhdf_A2NETV-JaZJ6ISankrbRzWXSMiXM,4140
|
7
12
|
aisp/nsa/_negative_selection.py,sha256=-hqMspYvtPAb38qV1_NF5HmDCOGDWGL89BZ3M4eHiao,28141
|
@@ -11,8 +16,8 @@ aisp/utils/_multiclass.py,sha256=nWd58ayVfxgdopBQc9b_xywkolJ2fGW3AN-JoD2A9Fw,113
|
|
11
16
|
aisp/utils/distance.py,sha256=pIt76OUiwCry6eNEuWLYvUiW4KkeU6egjjnnmroFet8,6556
|
12
17
|
aisp/utils/metrics.py,sha256=zDAScDbHRnfu24alRcZ6fEIUaWNoCD-QCtOCFBWPPo8,1277
|
13
18
|
aisp/utils/sanitizers.py,sha256=u1GizdJ-RKfPWJLnuFiM09lpItZMhDR_EvK8YdVHwDk,1858
|
14
|
-
aisp-0.
|
15
|
-
aisp-0.
|
16
|
-
aisp-0.
|
17
|
-
aisp-0.
|
18
|
-
aisp-0.
|
19
|
+
aisp-0.2.0.dist-info/licenses/LICENSE,sha256=fTqV5eBpeAZO0_jit8j4Ref9ikBSlHJ8xwj5TLg7gFk,7817
|
20
|
+
aisp-0.2.0.dist-info/METADATA,sha256=5jLC17E3FIl_8ewjhK5hkJQ7YsJFP_a-bMW8EHDCDSc,4631
|
21
|
+
aisp-0.2.0.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
22
|
+
aisp-0.2.0.dist-info/top_level.txt,sha256=Q5aJi_rAVT5UNS1As0ZafoyS5dwNibnoyOYV7RWUB9s,5
|
23
|
+
aisp-0.2.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|