aisp 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aisp/__init__.py +25 -3
- aisp/base/__init__.py +3 -1
- aisp/base/_base.py +65 -0
- aisp/base/_classifier.py +5 -16
- aisp/base/_clusterer.py +77 -0
- aisp/csa/__init__.py +1 -1
- aisp/csa/{_ai_immune_recognition_sys.py → _ai_recognition_sys.py} +27 -29
- aisp/csa/_base.py +0 -1
- aisp/ina/__init__.py +14 -0
- aisp/ina/_ai_network.py +552 -0
- aisp/ina/_base.py +124 -0
- aisp/nsa/__init__.py +2 -1
- aisp/nsa/_binary_negative_selection.py +239 -0
- aisp/nsa/_negative_selection.py +22 -253
- aisp/utils/distance.py +4 -4
- aisp/utils/validation.py +5 -5
- {aisp-0.2.1.dist-info → aisp-0.3.1.dist-info}/METADATA +6 -5
- aisp-0.3.1.dist-info/RECORD +31 -0
- aisp-0.2.1.dist-info/RECORD +0 -25
- {aisp-0.2.1.dist-info → aisp-0.3.1.dist-info}/WHEEL +0 -0
- {aisp-0.2.1.dist-info → aisp-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {aisp-0.2.1.dist-info → aisp-0.3.1.dist-info}/top_level.txt +0 -0
aisp/ina/_ai_network.py
ADDED
@@ -0,0 +1,552 @@
|
|
1
|
+
"""Artificial Immune Network (AiNet)."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
from collections import Counter
|
5
|
+
from heapq import nlargest
|
6
|
+
from typing import Optional
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
import numpy.typing as npt
|
10
|
+
from scipy.sparse.csgraph import minimum_spanning_tree, connected_components
|
11
|
+
from scipy.spatial.distance import squareform, pdist, cdist
|
12
|
+
from tqdm import tqdm
|
13
|
+
|
14
|
+
from ._base import BaseAiNet
|
15
|
+
from ..base import set_seed_numba
|
16
|
+
from ..base.mutation import clone_and_mutate_binary, clone_and_mutate_continuous, \
|
17
|
+
clone_and_mutate_ranged
|
18
|
+
from ..utils.sanitizers import sanitize_choice, sanitize_param, sanitize_seed
|
19
|
+
from ..utils.distance import hamming, compute_metric_distance, get_metric_code
|
20
|
+
from ..utils.types import FeatureType, MetricType
|
21
|
+
from ..utils.validation import detect_vector_data_type
|
22
|
+
|
23
|
+
|
24
|
+
class AiNet(BaseAiNet):
|
25
|
+
"""Artificial Immune Network for Compression and Clustering.
|
26
|
+
|
27
|
+
This class implements the aiNet algorithm, an artificial immune network model designed for
|
28
|
+
clustering and data compression tasks. The aiNet algorithm uses principles from immune
|
29
|
+
network theory, clonal selection, and affinity maturation to compress high-dimensional
|
30
|
+
datasets. [1]_
|
31
|
+
For clustering, the class uses SciPy’s implementation of the **Minimum Spanning Tree**
|
32
|
+
(MST) to remove the most distant nodes and separate the groups. [2]_
|
33
|
+
|
34
|
+
Parameters
|
35
|
+
----------
|
36
|
+
N : int, default=50
|
37
|
+
Number of memory cells (antibodies) in the population.
|
38
|
+
n_clone : int, default=10
|
39
|
+
Number of clones generated for each selected memory cell.
|
40
|
+
top_clonal_memory_size : Optional[int], default=5
|
41
|
+
Number of highest-affinity antibodies selected per antigen for cloning and mutation.
|
42
|
+
If set to None or 0, all antibodies are cloned, following the original aiNet algorithm.
|
43
|
+
n_diversity_injection : int, default=5
|
44
|
+
Number of new random memory cells injected to maintain diversity.
|
45
|
+
affinity_threshold : float, default=0.5
|
46
|
+
Threshold for affinity (similarity) to determine cell suppression or selection.
|
47
|
+
suppression_threshold : float, default=0.5
|
48
|
+
Threshold for suppressing similar memory cells.
|
49
|
+
mst_inconsistency_factor : float, default=2.0
|
50
|
+
Factor used to determine which edges in the **Minimum Spanning Tree (MST)**
|
51
|
+
are considered inconsistent.
|
52
|
+
max_iterations : int, default=10
|
53
|
+
Maximum number of training iterations.
|
54
|
+
k : int, default=3
|
55
|
+
The number of K nearest neighbors that will be used to choose a label in the prediction.
|
56
|
+
metric : Literal["manhattan", "minkowski", "euclidean"], default="euclidean"
|
57
|
+
Way to calculate the distance between the detector and the sample:
|
58
|
+
|
59
|
+
* ``'Euclidean'`` ➜ The calculation of the distance is given by the expression:
|
60
|
+
√( (x₁ – x₂)² + (y₁ – y₂)² + ... + (yn – yn)²).
|
61
|
+
|
62
|
+
* ``'minkowski'`` ➜ The calculation of the distance is given by the expression:
|
63
|
+
( |X₁ – Y₁|p + |X₂ – Y₂|p + ... + |Xn – Yn|p) ¹/ₚ.
|
64
|
+
|
65
|
+
* ``'manhattan'`` ➜ The calculation of the distance is given by the expression:
|
66
|
+
( |x₁ – x₂| + |y₁ – y₂| + ... + |yn – yn|).
|
67
|
+
|
68
|
+
seed : Optional[int]
|
69
|
+
Seed for the random generation of detector values. Defaults to None.
|
70
|
+
use_mst_clustering : bool, default=True
|
71
|
+
If ``True``, performs clustering with **Minimum Spanning Tree** (MST). If ``False``,
|
72
|
+
does not perform clustering and predict returns None.
|
73
|
+
**kwargs
|
74
|
+
p : float
|
75
|
+
This parameter stores the value of ``p`` used in the Minkowski distance. The default
|
76
|
+
is ``2``, which represents normalized Euclidean distance.\
|
77
|
+
Different values of p lead to different variants of the Minkowski Distance.
|
78
|
+
|
79
|
+
References
|
80
|
+
----------
|
81
|
+
.. [1] De Castro, Leandro & José, Fernando & von Zuben, Antonio Augusto. (2001). aiNet: An
|
82
|
+
Artificial Immune Network for Data Analysis.
|
83
|
+
Available at:
|
84
|
+
https://www.researchgate.net/publication/
|
85
|
+
228378350_aiNet_An_Artificial_Immune_Network_for_Data_Analysis
|
86
|
+
.. [2] SciPy Documentation. *Minimum Spanning Tree*.
|
87
|
+
https://docs.scipy.org/doc/scipy/reference/generated/
|
88
|
+
scipy.sparse.csgraph.minimum_spanning_tree
|
89
|
+
"""
|
90
|
+
|
91
|
+
def __init__(
|
92
|
+
self,
|
93
|
+
N: int = 50,
|
94
|
+
n_clone: int = 10,
|
95
|
+
top_clonal_memory_size: int = 5,
|
96
|
+
n_diversity_injection: int = 5,
|
97
|
+
affinity_threshold: float = 0.5,
|
98
|
+
suppression_threshold: float = 0.5,
|
99
|
+
mst_inconsistency_factor: float = 2.0,
|
100
|
+
max_iterations: int = 10,
|
101
|
+
k: int = 3,
|
102
|
+
metric: MetricType = "euclidean",
|
103
|
+
seed: Optional[int] = None,
|
104
|
+
use_mst_clustering: bool = True,
|
105
|
+
**kwargs
|
106
|
+
):
|
107
|
+
self.N: int = sanitize_param(N, 50, lambda x: x > 0)
|
108
|
+
self.n_clone: int = sanitize_param(n_clone, 10, lambda x: x > 0)
|
109
|
+
if top_clonal_memory_size is None:
|
110
|
+
self.top_clonal_memory_size: Optional[int] = None
|
111
|
+
else:
|
112
|
+
self.top_clonal_memory_size: Optional[int] = sanitize_param(
|
113
|
+
top_clonal_memory_size, 5, lambda x: x > 0
|
114
|
+
)
|
115
|
+
|
116
|
+
self.n_diversity_injection: int = sanitize_param(
|
117
|
+
n_diversity_injection, 5, lambda x: x > 0
|
118
|
+
)
|
119
|
+
self.affinity_threshold: float = sanitize_param(
|
120
|
+
affinity_threshold, 0.5, lambda x: x > 0
|
121
|
+
)
|
122
|
+
self.suppression_threshold: float = sanitize_param(
|
123
|
+
suppression_threshold, 0.5, lambda x: x > 0
|
124
|
+
)
|
125
|
+
self.mst_inconsistency_factor: float = sanitize_param(
|
126
|
+
mst_inconsistency_factor, 2, lambda x: x >= 0
|
127
|
+
)
|
128
|
+
self.max_iterations: int = sanitize_param(max_iterations, 10, lambda x: x > 0)
|
129
|
+
self.k: int = sanitize_param(k, 1, lambda x: x > 0)
|
130
|
+
self.seed: Optional[int] = sanitize_seed(seed)
|
131
|
+
self.use_mst_clustering: bool = use_mst_clustering
|
132
|
+
if self.seed is not None:
|
133
|
+
np.random.seed(self.seed)
|
134
|
+
set_seed_numba(self.seed)
|
135
|
+
|
136
|
+
self._feature_type: FeatureType = "continuous-features"
|
137
|
+
self.metric: str = sanitize_choice(
|
138
|
+
metric, ["euclidean", "manhattan", "minkowski"], "euclidean"
|
139
|
+
)
|
140
|
+
if self._feature_type == "binary-features":
|
141
|
+
self.metric = "hamming"
|
142
|
+
|
143
|
+
self.p: np.float64 = np.float64(kwargs.get("p", 2.0))
|
144
|
+
self._metric_params = {}
|
145
|
+
if self.metric == "minkowski":
|
146
|
+
self._metric_params['p'] = self.p
|
147
|
+
self.classes = []
|
148
|
+
self._memory_network: dict = {}
|
149
|
+
self._population_antibodies: Optional[npt.NDArray] = None
|
150
|
+
self._n_features: int = 0
|
151
|
+
self._bounds: Optional[npt.NDArray[np.float64]] = None
|
152
|
+
self._mst_structure: Optional[npt.NDArray] = None
|
153
|
+
self._mst_mean_distance: Optional[float] = None
|
154
|
+
self._mst_std_distance: Optional[float] = None
|
155
|
+
self._predict_cells = None
|
156
|
+
self._predict_labels = None
|
157
|
+
|
158
|
+
@property
|
159
|
+
def memory_network(self) -> dict:
|
160
|
+
"""Return the immune network representing clusters or graph structure."""
|
161
|
+
return self._memory_network
|
162
|
+
|
163
|
+
@property
|
164
|
+
def population_antibodies(self) -> Optional[npt.NDArray]:
|
165
|
+
"""Return the set of memory antibodies."""
|
166
|
+
return self._population_antibodies
|
167
|
+
|
168
|
+
@property
|
169
|
+
def mst(self) -> dict:
|
170
|
+
"""Returns the Minimum Spanning Tree and its statistics."""
|
171
|
+
return {
|
172
|
+
'graph': self._mst_structure,
|
173
|
+
'mean_distance': self._mst_mean_distance,
|
174
|
+
'std_distance': self._mst_std_distance
|
175
|
+
}
|
176
|
+
|
177
|
+
def fit(self, X: npt.NDArray, verbose: bool = True) -> AiNet:
|
178
|
+
"""
|
179
|
+
Train the AiNet model on input data.
|
180
|
+
|
181
|
+
Parameters
|
182
|
+
----------
|
183
|
+
X : npt.NDArray
|
184
|
+
Input data used for training the model.
|
185
|
+
verbose : bool, default=True
|
186
|
+
Feedback from the progress bar showing current training interaction details.
|
187
|
+
|
188
|
+
Returns
|
189
|
+
-------
|
190
|
+
self : AiNet
|
191
|
+
Returns the instance of the class that implements this method.
|
192
|
+
"""
|
193
|
+
self._feature_type = detect_vector_data_type(X)
|
194
|
+
|
195
|
+
super()._check_and_raise_exceptions_fit(X)
|
196
|
+
|
197
|
+
match self._feature_type:
|
198
|
+
case "binary-features":
|
199
|
+
X = X.astype(np.bool_)
|
200
|
+
self.metric = "hamming"
|
201
|
+
case "ranged-features":
|
202
|
+
self._bounds = np.vstack([np.min(X, axis=0), np.max(X, axis=0)])
|
203
|
+
|
204
|
+
self._n_features = X.shape[1]
|
205
|
+
|
206
|
+
progress = tqdm(
|
207
|
+
total=self.max_iterations,
|
208
|
+
postfix="\n",
|
209
|
+
disable=not verbose,
|
210
|
+
bar_format="{desc} ┇{bar}┇ {n}/{total} total training interactions",
|
211
|
+
)
|
212
|
+
|
213
|
+
population_p = self._init_population_antibodies()
|
214
|
+
|
215
|
+
t: int = 1
|
216
|
+
while t <= self.max_iterations:
|
217
|
+
pool_memory = []
|
218
|
+
permutations = np.random.permutation(X.shape[0])
|
219
|
+
for antigen in X[permutations]:
|
220
|
+
clonal_memory = self._select_and_clone_population(antigen, population_p)
|
221
|
+
pool_memory.extend(self._clonal_suppression(antigen, clonal_memory))
|
222
|
+
pool_memory = self._memory_suppression(pool_memory)
|
223
|
+
|
224
|
+
if t < self.max_iterations:
|
225
|
+
pool_memory.extend(self._diversity_introduction())
|
226
|
+
population_p = np.asarray(pool_memory)
|
227
|
+
|
228
|
+
progress.update(1)
|
229
|
+
|
230
|
+
t += 1
|
231
|
+
self._population_antibodies = population_p
|
232
|
+
|
233
|
+
if self.use_mst_clustering:
|
234
|
+
self._build_mst()
|
235
|
+
self.update_clusters()
|
236
|
+
progress.set_description(
|
237
|
+
f"\033[92m✔ Set of memory antibodies for classes "
|
238
|
+
f"({', '.join(map(str, self.classes))}) successfully generated | "
|
239
|
+
f"Clusters: {len(self.classes)} | Population of antibodies size: "
|
240
|
+
f"{len(self._population_antibodies)}\033[0m"
|
241
|
+
)
|
242
|
+
progress.close()
|
243
|
+
|
244
|
+
return self
|
245
|
+
|
246
|
+
def predict(self, X) -> Optional[npt.NDArray]:
|
247
|
+
"""
|
248
|
+
Predict cluster labels for input data.
|
249
|
+
|
250
|
+
Parameters
|
251
|
+
----------
|
252
|
+
X : npt.NDArray
|
253
|
+
Data to predict.
|
254
|
+
|
255
|
+
Returns
|
256
|
+
-------
|
257
|
+
Predictions : Optional[npt.NDArray]
|
258
|
+
Predicted cluster labels, or None if clustering is disabled.
|
259
|
+
"""
|
260
|
+
if not self.use_mst_clustering or self._memory_network is None:
|
261
|
+
return None
|
262
|
+
|
263
|
+
super()._check_and_raise_exceptions_predict(
|
264
|
+
X, self._n_features, self._feature_type
|
265
|
+
)
|
266
|
+
|
267
|
+
c: list = []
|
268
|
+
|
269
|
+
all_cells_memory = [
|
270
|
+
(class_name, cell)
|
271
|
+
for class_name in self.classes
|
272
|
+
for cell in self._memory_network[class_name]
|
273
|
+
]
|
274
|
+
|
275
|
+
for line in X:
|
276
|
+
label_stim_list = [
|
277
|
+
(class_name, self._affinity(memory, line))
|
278
|
+
for class_name, memory in all_cells_memory
|
279
|
+
]
|
280
|
+
# Create the list with the k nearest neighbors and select the class with the most votes
|
281
|
+
k_nearest = nlargest(self.k, label_stim_list, key=lambda x: x[1])
|
282
|
+
votes = Counter(label for label, _ in k_nearest)
|
283
|
+
c.append(votes.most_common(1)[0][0])
|
284
|
+
return np.array(c)
|
285
|
+
|
286
|
+
def _init_population_antibodies(self) -> npt.NDArray:
|
287
|
+
"""
|
288
|
+
Initialize the antibody set of the network population randomly.
|
289
|
+
|
290
|
+
Returns
|
291
|
+
-------
|
292
|
+
npt.NDArray
|
293
|
+
List of initialized memories.
|
294
|
+
"""
|
295
|
+
return self._generate_random_antibodies(
|
296
|
+
self.N,
|
297
|
+
self._n_features,
|
298
|
+
self._feature_type,
|
299
|
+
self._bounds
|
300
|
+
)
|
301
|
+
|
302
|
+
def _select_and_clone_population(
|
303
|
+
self,
|
304
|
+
antigen: npt.NDArray,
|
305
|
+
population: npt.NDArray
|
306
|
+
) -> list:
|
307
|
+
"""
|
308
|
+
Select top antibodies by affinity and generate mutated clones.
|
309
|
+
|
310
|
+
Parameters
|
311
|
+
----------
|
312
|
+
antigen : npt.NDArray
|
313
|
+
The antigen for which affinities will be calculated.
|
314
|
+
population: list
|
315
|
+
The list of antibodies (solutions) to be evaluated and cloned.
|
316
|
+
|
317
|
+
Returns
|
318
|
+
-------
|
319
|
+
list[npt.NDArray]
|
320
|
+
List of mutated clones.
|
321
|
+
"""
|
322
|
+
affinities = self._calculate_affinities(antigen, population)
|
323
|
+
|
324
|
+
if self.top_clonal_memory_size is not None and self.top_clonal_memory_size > 0:
|
325
|
+
selected_idxs = np.argsort(-affinities)[:self.top_clonal_memory_size]
|
326
|
+
else:
|
327
|
+
selected_idxs = np.arange(affinities.shape[0])
|
328
|
+
|
329
|
+
clonal_m = []
|
330
|
+
for i in selected_idxs:
|
331
|
+
clones = self._clone_and_mutate(
|
332
|
+
population[i],
|
333
|
+
int(self.n_clone * affinities[i])
|
334
|
+
)
|
335
|
+
clonal_m.extend(clones)
|
336
|
+
|
337
|
+
return clonal_m
|
338
|
+
|
339
|
+
def _clonal_suppression(self, antigen: npt.NDArray, clones: list):
|
340
|
+
"""
|
341
|
+
Suppresses redundant clones based on affinity thresholds.
|
342
|
+
|
343
|
+
This function removes clones whose affinity with the antigen is lower than the defined
|
344
|
+
threshold (affinity_threshold) and eliminates redundant clones whose similarity with the
|
345
|
+
clones already selected exceeds the suppression threshold (suppression_threshold).
|
346
|
+
|
347
|
+
Parameters
|
348
|
+
----------
|
349
|
+
antigen : npt.NDArray
|
350
|
+
The antigen for which affinities will be calculated.
|
351
|
+
clones : list
|
352
|
+
The list of candidate clones to be suppressed.
|
353
|
+
|
354
|
+
Returns
|
355
|
+
-------
|
356
|
+
list
|
357
|
+
Non-redundant, high-affinity clones.
|
358
|
+
"""
|
359
|
+
suppression_affinity = [
|
360
|
+
clone for clone in clones
|
361
|
+
if self._affinity(clone, antigen) > self.affinity_threshold
|
362
|
+
]
|
363
|
+
return self._memory_suppression(suppression_affinity)
|
364
|
+
|
365
|
+
def _memory_suppression(self, pool_memory: list) -> list:
|
366
|
+
"""
|
367
|
+
Remove redundant antibodies from memory pool.
|
368
|
+
|
369
|
+
Calculate the affinity between all memory antibodies and remove redundant antibodies
|
370
|
+
whose similarity exceeds the suppression threshold.
|
371
|
+
|
372
|
+
Parameters
|
373
|
+
----------
|
374
|
+
pool_memory : list
|
375
|
+
antibodies memory.
|
376
|
+
|
377
|
+
Returns
|
378
|
+
-------
|
379
|
+
list
|
380
|
+
Memory pool without redundant antibodies.
|
381
|
+
"""
|
382
|
+
if not pool_memory:
|
383
|
+
return []
|
384
|
+
suppressed_memory = [pool_memory[0]]
|
385
|
+
for candidate in pool_memory[1:]:
|
386
|
+
affinities = self._calculate_affinities(
|
387
|
+
candidate.reshape(1, -1),
|
388
|
+
np.asarray(suppressed_memory)
|
389
|
+
)
|
390
|
+
|
391
|
+
if not np.any(affinities > self.suppression_threshold):
|
392
|
+
suppressed_memory.append(candidate)
|
393
|
+
return suppressed_memory
|
394
|
+
|
395
|
+
def _diversity_introduction(self):
|
396
|
+
"""
|
397
|
+
Introduce diversity into the antibody population.
|
398
|
+
|
399
|
+
Returns
|
400
|
+
-------
|
401
|
+
npt.NDArray
|
402
|
+
Array of new random antibodies for diversity introduction.
|
403
|
+
"""
|
404
|
+
return self._generate_random_antibodies(
|
405
|
+
self.n_diversity_injection,
|
406
|
+
self._n_features,
|
407
|
+
self._feature_type,
|
408
|
+
self._bounds
|
409
|
+
)
|
410
|
+
|
411
|
+
def _affinity(self, u: npt.NDArray, v: npt.NDArray) -> float:
|
412
|
+
"""
|
413
|
+
Calculate the stimulus between two vectors using metrics.
|
414
|
+
|
415
|
+
Parameters
|
416
|
+
----------
|
417
|
+
u : npt.NDArray
|
418
|
+
Coordinates of the first point.
|
419
|
+
v : npt.NDArray
|
420
|
+
Coordinates of the second point.
|
421
|
+
|
422
|
+
Returns
|
423
|
+
-------
|
424
|
+
float
|
425
|
+
Affinity score in [0, 1], where higher means more similar.
|
426
|
+
"""
|
427
|
+
distance: float
|
428
|
+
if self._feature_type == "binary-features":
|
429
|
+
distance = hamming(u, v)
|
430
|
+
else:
|
431
|
+
distance = compute_metric_distance(
|
432
|
+
u, v, get_metric_code(self.metric), self.p
|
433
|
+
)
|
434
|
+
|
435
|
+
return 1 - (distance / (1 + distance))
|
436
|
+
|
437
|
+
def _calculate_affinities(self, u: npt.NDArray, v: npt.NDArray) -> npt.NDArray:
|
438
|
+
"""
|
439
|
+
Calculate the affinity matrix between a reference vector and a set of target vectors.
|
440
|
+
|
441
|
+
Parameters
|
442
|
+
----------
|
443
|
+
u : npt.NDArray
|
444
|
+
An array with shape (n_features).
|
445
|
+
v : npt.NDArray
|
446
|
+
An array of vectors with shape (n_samples, n_features).
|
447
|
+
|
448
|
+
|
449
|
+
Returns
|
450
|
+
-------
|
451
|
+
npt.NDArray
|
452
|
+
One-dimensional array of shape (n_samples,), containing the affinities between `u`
|
453
|
+
and each vector in `v`.
|
454
|
+
"""
|
455
|
+
u = np.reshape(u, (1, -1))
|
456
|
+
v = np.atleast_2d(v)
|
457
|
+
distances = cdist(u, v, metric=self.metric, **self._metric_params)[0]
|
458
|
+
|
459
|
+
return 1 - (distances / (1 + distances))
|
460
|
+
|
461
|
+
def _clone_and_mutate(self, antibody: npt.NDArray, n_clone: int) -> npt.NDArray:
|
462
|
+
"""
|
463
|
+
Generate mutated clones from an antibody, based on the feature type.
|
464
|
+
|
465
|
+
Parameters
|
466
|
+
----------
|
467
|
+
antibody : npt.NDArray
|
468
|
+
Original antibody vector to be cloned and mutated.
|
469
|
+
n_clone : int
|
470
|
+
Number of clones to generate.
|
471
|
+
|
472
|
+
Returns
|
473
|
+
-------
|
474
|
+
npt.NDArray
|
475
|
+
Array of shape (n_clone, len(antibody)) containing mutated clones
|
476
|
+
"""
|
477
|
+
if self._feature_type == "binary-features":
|
478
|
+
return clone_and_mutate_binary(antibody, n_clone)
|
479
|
+
if self._feature_type == "ranged-features" and self._bounds is not None:
|
480
|
+
return clone_and_mutate_ranged(antibody, n_clone, self._bounds)
|
481
|
+
return clone_and_mutate_continuous(antibody, n_clone)
|
482
|
+
|
483
|
+
def _build_mst(self):
|
484
|
+
"""Construct the Minimum Spanning Tree (MST) for the antibody population.
|
485
|
+
|
486
|
+
Computes the pairwise distances between antibodies, builds the MST from
|
487
|
+
these distances, and stores the MST structure along with the mean and
|
488
|
+
standard deviation of its edge weights.
|
489
|
+
|
490
|
+
Raises
|
491
|
+
------
|
492
|
+
ValueError
|
493
|
+
If the antibody population is empty.
|
494
|
+
"""
|
495
|
+
if self._population_antibodies is None or len(self._population_antibodies) == 0:
|
496
|
+
raise ValueError("Population of antibodies is empty")
|
497
|
+
|
498
|
+
antibodies_matrix = squareform(
|
499
|
+
pdist(self._population_antibodies, metric=self.metric, **self._metric_params)
|
500
|
+
)
|
501
|
+
antibodies_mst = minimum_spanning_tree(antibodies_matrix).toarray()
|
502
|
+
self._mst_structure = antibodies_mst
|
503
|
+
nonzero_edges = antibodies_mst[antibodies_mst > 0]
|
504
|
+
self._mst_mean_distance = float(np.mean(nonzero_edges)) if nonzero_edges.size else 0.0
|
505
|
+
self._mst_std_distance = float(np.std(nonzero_edges)) if nonzero_edges.size else 0.0
|
506
|
+
|
507
|
+
def update_clusters(self, mst_inconsistency_factor: Optional[float] = None):
|
508
|
+
"""Partition the clusters based on the MST inconsistency factor.
|
509
|
+
|
510
|
+
Uses the precomputed Minimum Spanning Tree (MST) of the antibody population
|
511
|
+
to redefine clusters. Edges whose weights exceed the mean plus the
|
512
|
+
`mst_inconsistency_factor` multiplied by the standard deviation of MST edge
|
513
|
+
weights are removed. Each connected component after pruning is treated as a
|
514
|
+
distinct cluster.
|
515
|
+
|
516
|
+
Parameters
|
517
|
+
----------
|
518
|
+
mst_inconsistency_factor : float, optional
|
519
|
+
If provided, overrides the current inconsistency factor.
|
520
|
+
|
521
|
+
Raises
|
522
|
+
------
|
523
|
+
ValueError
|
524
|
+
If the Minimum Spanning Tree (MST) has not yet been created
|
525
|
+
|
526
|
+
Updates
|
527
|
+
-------
|
528
|
+
self._memory_network : dict[int, npt.NDArray]
|
529
|
+
Dictionary mapping cluster labels to antibody arrays.
|
530
|
+
self.classes : list
|
531
|
+
List of cluster labels.
|
532
|
+
"""
|
533
|
+
if self._mst_structure is None:
|
534
|
+
raise ValueError("The Minimum Spanning Tree (MST) has not yet been created.")
|
535
|
+
|
536
|
+
if mst_inconsistency_factor is not None:
|
537
|
+
self.mst_inconsistency_factor = mst_inconsistency_factor
|
538
|
+
|
539
|
+
antibodies_mst = self._mst_structure.copy()
|
540
|
+
|
541
|
+
thresholds = antibodies_mst > (
|
542
|
+
self._mst_mean_distance + self.mst_inconsistency_factor * self._mst_std_distance
|
543
|
+
)
|
544
|
+
antibodies_mst[thresholds] = 0
|
545
|
+
|
546
|
+
n_antibodies, labels = connected_components(csgraph=antibodies_mst, directed=False)
|
547
|
+
|
548
|
+
self._memory_network = {
|
549
|
+
label: self._population_antibodies[labels == label]
|
550
|
+
for label in range(n_antibodies)
|
551
|
+
}
|
552
|
+
self.classes = np.array(list(self._memory_network.keys()))
|
aisp/ina/_base.py
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
"""Base Class for Network Theory Algorithms."""
|
2
|
+
|
3
|
+
from abc import ABC
|
4
|
+
from typing import Optional
|
5
|
+
|
6
|
+
from numpy import typing as npt
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
|
10
|
+
from ..base import BaseClusterer
|
11
|
+
from ..exceptions import FeatureDimensionMismatch
|
12
|
+
from ..utils.types import FeatureType
|
13
|
+
|
14
|
+
|
15
|
+
class BaseAiNet(BaseClusterer, ABC):
|
16
|
+
"""Abstract base class for AINet-based clustering algorithms."""
|
17
|
+
|
18
|
+
@staticmethod
|
19
|
+
def _check_and_raise_exceptions_fit(
|
20
|
+
X: npt.NDArray
|
21
|
+
):
|
22
|
+
"""
|
23
|
+
Verify the fit parameters and throw exceptions if the verification is not successful.
|
24
|
+
|
25
|
+
Parameters
|
26
|
+
----------
|
27
|
+
X : npt.NDArray
|
28
|
+
Training array, containing the samples and their characteristics,
|
29
|
+
[``N samples`` (rows)][``N features`` (columns)].
|
30
|
+
|
31
|
+
Raises
|
32
|
+
------
|
33
|
+
TypeError
|
34
|
+
If X is not an ndarray or list.
|
35
|
+
"""
|
36
|
+
if not isinstance(X, np.ndarray) and not isinstance(X, list):
|
37
|
+
raise TypeError("X is not an ndarray or list.")
|
38
|
+
|
39
|
+
@staticmethod
|
40
|
+
def _check_and_raise_exceptions_predict(
|
41
|
+
X: npt.NDArray,
|
42
|
+
expected: int = 0,
|
43
|
+
feature_type: FeatureType = "continuous-features"
|
44
|
+
) -> None:
|
45
|
+
"""
|
46
|
+
Verify the predict parameters and throw exceptions if the verification is not successful.
|
47
|
+
|
48
|
+
Parameters
|
49
|
+
----------
|
50
|
+
X : npt.NDArray
|
51
|
+
Input array for prediction, containing the samples and their characteristics,
|
52
|
+
[``N samples`` (rows)][``N features`` (columns)].
|
53
|
+
expected : int, default=0
|
54
|
+
Expected number of features per sample (columns in X).
|
55
|
+
feature_type : FeatureType, default="continuous-features"
|
56
|
+
Specifies the type of features: "continuous-features", "binary-features",
|
57
|
+
or "ranged-features".
|
58
|
+
|
59
|
+
|
60
|
+
Raises
|
61
|
+
------
|
62
|
+
TypeError
|
63
|
+
If X is not a ndarray or list.
|
64
|
+
FeatureDimensionMismatch
|
65
|
+
If the number of features in X does not match the expected number.
|
66
|
+
ValueError
|
67
|
+
If feature_type is "binary-features" and X contains values other than 0 and 1.
|
68
|
+
"""
|
69
|
+
if not isinstance(X, (np.ndarray, list)):
|
70
|
+
raise TypeError("X is not an ndarray or list")
|
71
|
+
if expected != len(X[0]):
|
72
|
+
raise FeatureDimensionMismatch(
|
73
|
+
expected,
|
74
|
+
len(X[0]),
|
75
|
+
"X"
|
76
|
+
)
|
77
|
+
|
78
|
+
if feature_type != "binary-features":
|
79
|
+
return
|
80
|
+
|
81
|
+
# Checks if matrix X contains only binary samples. Otherwise, raises an exception.
|
82
|
+
if not np.isin(X, [0, 1]).all():
|
83
|
+
raise ValueError(
|
84
|
+
"The array X contains values that are not composed only of 0 and 1."
|
85
|
+
)
|
86
|
+
|
87
|
+
@staticmethod
|
88
|
+
def _generate_random_antibodies(
|
89
|
+
n_samples: int,
|
90
|
+
n_features: int,
|
91
|
+
feature_type: FeatureType = "continuous-features",
|
92
|
+
bounds: Optional[npt.NDArray[np.float64]] = None
|
93
|
+
) -> npt.NDArray:
|
94
|
+
"""
|
95
|
+
Generate a random antibody population.
|
96
|
+
|
97
|
+
Parameters
|
98
|
+
----------
|
99
|
+
n_samples : int
|
100
|
+
Number of antibodies (samples) to generate.
|
101
|
+
n_features : int
|
102
|
+
Number of features (dimensions) for each antibody.
|
103
|
+
feature_type : FeatureType, default="continuous-features"
|
104
|
+
Specifies the type of features: "continuous-features", "binary-features",
|
105
|
+
or "ranged-features".
|
106
|
+
bounds : np.ndarray
|
107
|
+
Array (n_features, 2) with min and max per dimension.
|
108
|
+
|
109
|
+
Returns
|
110
|
+
-------
|
111
|
+
npt.NDArray
|
112
|
+
Array of shape (n_samples, n_features) containing the generated antibodies.
|
113
|
+
Data type depends on the feature_type type (float for continuous/ranged, bool for
|
114
|
+
binary).
|
115
|
+
"""
|
116
|
+
if n_features <= 0:
|
117
|
+
raise ValueError("Number of features must be greater than zero.")
|
118
|
+
|
119
|
+
if feature_type == "binary-features":
|
120
|
+
return np.random.randint(0, 2, size=(n_samples, n_features)).astype(np.bool_)
|
121
|
+
if feature_type == "ranged-features" and bounds is not None:
|
122
|
+
return np.random.uniform(low=bounds[0], high=bounds[1], size=(n_samples, n_features))
|
123
|
+
|
124
|
+
return np.random.random_sample(size=(n_samples, n_features))
|
aisp/nsa/__init__.py
CHANGED
@@ -5,7 +5,8 @@ distinguish between self and non-self. Only T-cells capable of recognizing non-s
|
|
5
5
|
preserved.
|
6
6
|
"""
|
7
7
|
|
8
|
-
from .
|
8
|
+
from ._binary_negative_selection import BNSA
|
9
|
+
from ._negative_selection import RNSA
|
9
10
|
|
10
11
|
__author__ = "João Paulo da Silva Barros"
|
11
12
|
__all__ = ["RNSA", "BNSA"]
|