aisp 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aisp/__init__.py +1 -1
- aisp/base/__init__.py +3 -1
- aisp/base/_base.py +65 -0
- aisp/base/_classifier.py +3 -15
- aisp/base/_clusterer.py +76 -0
- aisp/csa/__init__.py +1 -1
- aisp/csa/{_ai_immune_recognition_sys.py → _ai_recognition_sys.py} +17 -20
- aisp/ina/__init__.py +14 -0
- aisp/ina/_ai_network.py +553 -0
- aisp/ina/_base.py +124 -0
- aisp/nsa/_negative_selection.py +34 -38
- aisp/utils/distance.py +4 -4
- aisp/utils/validation.py +5 -5
- {aisp-0.2.1.dist-info → aisp-0.3.0.dist-info}/METADATA +6 -5
- aisp-0.3.0.dist-info/RECORD +30 -0
- aisp-0.2.1.dist-info/RECORD +0 -25
- {aisp-0.2.1.dist-info → aisp-0.3.0.dist-info}/WHEEL +0 -0
- {aisp-0.2.1.dist-info → aisp-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {aisp-0.2.1.dist-info → aisp-0.3.0.dist-info}/top_level.txt +0 -0
aisp/__init__.py
CHANGED
aisp/base/__init__.py
CHANGED
aisp/base/_base.py
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
"""Base class for parameter introspection compatible with the scikit-learn API."""
|
2
|
+
import random
|
3
|
+
|
4
|
+
import numpy as np
|
5
|
+
from numba import njit
|
6
|
+
|
7
|
+
|
8
|
+
class Base:
|
9
|
+
"""
|
10
|
+
Generic base class for models with a common interface.
|
11
|
+
|
12
|
+
Provides the ``get_params`` and ``set_params`` method for compatibility with
|
13
|
+
the scikit-learn API, allowing access to the model's public parameters.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def set_params(self, **params):
|
17
|
+
"""
|
18
|
+
Set the parameters of the instance.
|
19
|
+
|
20
|
+
This method is required to ensure compatibility with scikit-learn functions
|
21
|
+
|
22
|
+
Parameters
|
23
|
+
----------
|
24
|
+
**params
|
25
|
+
set as attributes on the instance.
|
26
|
+
|
27
|
+
Returns
|
28
|
+
-------
|
29
|
+
self
|
30
|
+
"""
|
31
|
+
for key, value in params.items():
|
32
|
+
if not key.startswith("_") and hasattr(self, key):
|
33
|
+
setattr(self, key, value)
|
34
|
+
return self
|
35
|
+
|
36
|
+
def get_params(self, deep: bool = True) -> dict: # pylint: disable=W0613
|
37
|
+
"""
|
38
|
+
Return a dictionary with the object's main parameters.
|
39
|
+
|
40
|
+
This method is required to ensure compatibility with scikit-learn functions.
|
41
|
+
|
42
|
+
Returns
|
43
|
+
-------
|
44
|
+
dict
|
45
|
+
Dictionary containing the object's attributes that do not start with "_".
|
46
|
+
"""
|
47
|
+
return {
|
48
|
+
key: value
|
49
|
+
for key, value in self.__dict__.items()
|
50
|
+
if not key.startswith("_")
|
51
|
+
}
|
52
|
+
|
53
|
+
|
54
|
+
@njit(cache=True)
|
55
|
+
def set_seed_numba(seed: int):
|
56
|
+
"""
|
57
|
+
Set the seed for random numbers used by functions compiled with Numba.
|
58
|
+
|
59
|
+
Parameters
|
60
|
+
----------
|
61
|
+
seed : int
|
62
|
+
Integer value used to initialize Numba's random number generator.
|
63
|
+
"""
|
64
|
+
np.random.seed(seed)
|
65
|
+
random.seed(seed)
|
aisp/base/_classifier.py
CHANGED
@@ -5,15 +5,15 @@ from typing import Optional, Union
|
|
5
5
|
|
6
6
|
import numpy.typing as npt
|
7
7
|
|
8
|
+
from ._base import Base
|
8
9
|
from ..utils import slice_index_list_by_class
|
9
10
|
from ..utils.metrics import accuracy_score
|
10
11
|
|
11
12
|
|
12
|
-
class BaseClassifier(ABC):
|
13
|
+
class BaseClassifier(ABC, Base):
|
13
14
|
"""Base class for classification algorithms.
|
14
15
|
|
15
|
-
Defines the abstract methods ``fit`` and ``predict``, and implements the ``score
|
16
|
-
``get_params`` method.
|
16
|
+
Defines the abstract methods ``fit`` and ``predict``, and implements the ``score`` method.
|
17
17
|
"""
|
18
18
|
|
19
19
|
classes: Union[npt.NDArray, list] = []
|
@@ -106,15 +106,3 @@ class BaseClassifier(ABC):
|
|
106
106
|
A dictionary with the list of array positions(``y``), with the classes as key.
|
107
107
|
"""
|
108
108
|
return slice_index_list_by_class(self.classes, y)
|
109
|
-
|
110
|
-
def get_params(self, deep: bool = True) -> dict: # pylint: disable=W0613
|
111
|
-
"""
|
112
|
-
Return a dictionary with the object's main parameters.
|
113
|
-
|
114
|
-
This method is required to ensure compatibility with scikit-learn functions.
|
115
|
-
"""
|
116
|
-
return {
|
117
|
-
key: value
|
118
|
-
for key, value in self.__dict__.items()
|
119
|
-
if not key.startswith("_")
|
120
|
-
}
|
aisp/base/_clusterer.py
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
"""Base class for clustering algorithms."""
|
2
|
+
|
3
|
+
from abc import ABC, abstractmethod
|
4
|
+
from typing import Optional
|
5
|
+
|
6
|
+
import numpy.typing as npt
|
7
|
+
|
8
|
+
from ._base import Base
|
9
|
+
|
10
|
+
|
11
|
+
class BaseClusterer(ABC, Base):
|
12
|
+
"""Abstract base class for clustering algorithms.
|
13
|
+
|
14
|
+
This class defines the core interface for clustering models. It enforces
|
15
|
+
the implementation of the `fit` and `predict` methods in all derived classes,
|
16
|
+
and provides a default implementation for `fit_predict` and `get_params`.
|
17
|
+
"""
|
18
|
+
|
19
|
+
@abstractmethod
|
20
|
+
def fit(self, X: npt.NDArray, verbose: bool = True) -> "BaseClusterer":
|
21
|
+
"""
|
22
|
+
Train the model using the input data X.
|
23
|
+
|
24
|
+
This abstract method is implemented by the class that inherits it.
|
25
|
+
|
26
|
+
Parameters
|
27
|
+
----------
|
28
|
+
X : npt.NDArray
|
29
|
+
Input data used for training the model.
|
30
|
+
verbose : bool, default=True
|
31
|
+
Flag to enable or disable detailed output during training.
|
32
|
+
|
33
|
+
Returns
|
34
|
+
-------
|
35
|
+
self : BaseClusterer
|
36
|
+
Returns the instance of the class that implements this method.
|
37
|
+
"""
|
38
|
+
|
39
|
+
@abstractmethod
|
40
|
+
def predict(self, X: npt.NDArray) -> Optional[npt.NDArray]:
|
41
|
+
"""
|
42
|
+
Generate predictions based on the input data X.
|
43
|
+
|
44
|
+
This abstract method is implemented by the class that inherits it.
|
45
|
+
|
46
|
+
Parameters
|
47
|
+
----------
|
48
|
+
X : npt.NDArray
|
49
|
+
Input data for which predictions will be generated.
|
50
|
+
|
51
|
+
Returns
|
52
|
+
-------
|
53
|
+
predictions : Optional[npt.NDArray]
|
54
|
+
Predicted cluster labels for each input sample, or None if prediction is not possible.
|
55
|
+
"""
|
56
|
+
|
57
|
+
def fit_predict(self, X, verbose: bool = True):
|
58
|
+
"""Fit the clustering model to the data and return cluster labels.
|
59
|
+
|
60
|
+
This is a convenience method that combines `fit` and `predict`
|
61
|
+
into a single call.
|
62
|
+
|
63
|
+
Parameters
|
64
|
+
----------
|
65
|
+
X : npt.NDArray
|
66
|
+
Input data for which predictions will be generated.
|
67
|
+
verbose : bool, default=True
|
68
|
+
Flag to enable or disable detailed output during training.
|
69
|
+
|
70
|
+
Returns
|
71
|
+
-------
|
72
|
+
predictions : Optional[npt.NDArray]
|
73
|
+
Predicted cluster labels for each input sample, or None if prediction is not possible.
|
74
|
+
"""
|
75
|
+
self.fit(X, verbose)
|
76
|
+
return self.predict(X)
|
aisp/csa/__init__.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
CSAs are inspired by the process of antibody proliferation upon detecting an antigen, during which
|
4
4
|
the generated antibodies undergo mutations in an attempt to enhance pathogen recognition.
|
5
5
|
"""
|
6
|
-
from .
|
6
|
+
from ._ai_recognition_sys import AIRS
|
7
7
|
|
8
8
|
__author__ = 'João Paulo da Silva Barros'
|
9
9
|
__all__ = ['AIRS']
|
@@ -11,7 +11,7 @@ import numpy.typing as npt
|
|
11
11
|
from scipy.spatial.distance import pdist
|
12
12
|
from tqdm import tqdm
|
13
13
|
|
14
|
-
|
14
|
+
from ..base import set_seed_numba
|
15
15
|
from ._cell import Cell
|
16
16
|
from ..utils.sanitizers import sanitize_param, sanitize_seed, sanitize_choice
|
17
17
|
from ..utils.distance import hamming, compute_metric_distance, get_metric_code
|
@@ -178,6 +178,7 @@ class AIRS(BaseAIRS):
|
|
178
178
|
self.seed: Optional[int] = sanitize_seed(seed)
|
179
179
|
if self.seed is not None:
|
180
180
|
np.random.seed(self.seed)
|
181
|
+
set_seed_numba(self.seed)
|
181
182
|
|
182
183
|
self._feature_type: FeatureType = "continuous-features"
|
183
184
|
|
@@ -219,8 +220,6 @@ class AIRS(BaseAIRS):
|
|
219
220
|
AIRS
|
220
221
|
Returns the instance itself.
|
221
222
|
"""
|
222
|
-
progress = None
|
223
|
-
|
224
223
|
self._feature_type = detect_vector_data_type(X)
|
225
224
|
|
226
225
|
super()._check_and_raise_exceptions_fit(X, y)
|
@@ -234,18 +233,17 @@ class AIRS(BaseAIRS):
|
|
234
233
|
|
235
234
|
self.classes = np.unique(y)
|
236
235
|
sample_index = self._slice_index_list_by_class(y)
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
236
|
+
progress = tqdm(
|
237
|
+
total=len(y),
|
238
|
+
postfix="\n",
|
239
|
+
disable=not verbose,
|
240
|
+
bar_format="{desc} ┇{bar}┇ {n}/{total} memory cells for each aᵢ",
|
241
|
+
)
|
243
242
|
pool_cells_classes = {}
|
244
243
|
for _class_ in self.classes:
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
)
|
244
|
+
progress.set_description_str(
|
245
|
+
f"Generating the memory cells for the {_class_} class:"
|
246
|
+
)
|
249
247
|
|
250
248
|
x_class = X[sample_index[_class_]]
|
251
249
|
# Calculating the similarity threshold between antigens
|
@@ -294,15 +292,14 @@ class AIRS(BaseAIRS):
|
|
294
292
|
if self._affinity(c_candidate.vector, c_match.vector) < sufficiently_similar:
|
295
293
|
pool_c.remove(c_match)
|
296
294
|
|
297
|
-
|
298
|
-
progress.update(1)
|
295
|
+
progress.update(1)
|
299
296
|
pool_cells_classes[_class_] = pool_c
|
300
297
|
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
298
|
+
progress.set_description(
|
299
|
+
f"\033[92m✔ Set of memory cells for classes ({', '.join(map(str, self.classes))}) "
|
300
|
+
f"successfully generated\033[0m"
|
301
|
+
)
|
302
|
+
progress.close()
|
306
303
|
self._cells_memory = pool_cells_classes
|
307
304
|
return self
|
308
305
|
|
aisp/ina/__init__.py
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
"""Module (ina) Immune Network Algorithm.
|
2
|
+
|
3
|
+
This module implements algorithms based on Network Theory Algorithms proposed by Jerne.
|
4
|
+
|
5
|
+
Classes
|
6
|
+
-------
|
7
|
+
AiNet
|
8
|
+
Artificial Immune Network implementation for clustering.
|
9
|
+
"""
|
10
|
+
|
11
|
+
from ._ai_network import AiNet
|
12
|
+
|
13
|
+
__author__ = 'João Paulo da Silva Barros'
|
14
|
+
__all__ = ['AiNet']
|
aisp/ina/_ai_network.py
ADDED
@@ -0,0 +1,553 @@
|
|
1
|
+
"""Artificial Immune Network (AiNet)."""
|
2
|
+
|
3
|
+
from collections import Counter
|
4
|
+
from heapq import nlargest
|
5
|
+
from typing import Optional
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import numpy.typing as npt
|
9
|
+
from scipy.sparse.csgraph import minimum_spanning_tree, connected_components
|
10
|
+
from scipy.spatial.distance import squareform, pdist, cdist
|
11
|
+
from tqdm import tqdm
|
12
|
+
|
13
|
+
from ._base import BaseAiNet
|
14
|
+
from ..base import set_seed_numba
|
15
|
+
from ..base.mutation import clone_and_mutate_binary, clone_and_mutate_continuous, \
|
16
|
+
clone_and_mutate_ranged
|
17
|
+
from ..utils.sanitizers import sanitize_choice, sanitize_param, sanitize_seed
|
18
|
+
from ..utils.distance import hamming, compute_metric_distance, get_metric_code
|
19
|
+
from ..utils.types import FeatureType, MetricType
|
20
|
+
from ..utils.validation import detect_vector_data_type
|
21
|
+
|
22
|
+
|
23
|
+
class AiNet(BaseAiNet):
|
24
|
+
"""Artificial Immune Network for Compression and Clustering .
|
25
|
+
|
26
|
+
This class implements the aiNet algorithm, an artificial immune network model designed for
|
27
|
+
clustering and data compression tasks. The aiNet algorithm uses principles from immune
|
28
|
+
network theory, clonal selection, and affinity maturation to compress high-dimensional
|
29
|
+
datasets. [1]_
|
30
|
+
For clustering, the class uses SciPy’s implementation of the **Minimum Spanning Tree**
|
31
|
+
(MST) to remove the most distant nodes and separate the groups. [2]_
|
32
|
+
|
33
|
+
Parameters
|
34
|
+
----------
|
35
|
+
N : int, default=50
|
36
|
+
Number of memory cells (antibodies) in the population.
|
37
|
+
n_clone : int, default=10
|
38
|
+
Number of clones generated for each selected memory cell.
|
39
|
+
top_clonal_memory_size : Optional[int], default=5
|
40
|
+
Number of highest-affinity antibodies selected per antigen for cloning and mutation.
|
41
|
+
If set to None or 0, all antibodies are cloned, following the original aiNet algorithm.
|
42
|
+
n_diversity_injection : int, default=5
|
43
|
+
Number of new random memory cells injected to maintain diversity.
|
44
|
+
affinity_threshold : float, default=0.5
|
45
|
+
Threshold for affinity (similarity) to determine cell suppression or selection.
|
46
|
+
suppression_threshold : float, default=0.5
|
47
|
+
Threshold for suppressing similar memory cells.
|
48
|
+
mst_inconsistency_factor : float, default=2.0
|
49
|
+
Factor used to determine which edges in the **Minimum Spanning Tree (MST)**
|
50
|
+
are considered inconsistent.
|
51
|
+
max_iterations : int, default=10
|
52
|
+
Maximum number of training iterations.
|
53
|
+
k : int, default=3
|
54
|
+
The number of K nearest neighbors that will be used to choose a label in the prediction.
|
55
|
+
metric : Literal["manhattan", "minkowski", "euclidean"], default="euclidean"
|
56
|
+
Way to calculate the distance between the detector and the sample:
|
57
|
+
|
58
|
+
* ``'Euclidean'`` ➜ The calculation of the distance is given by the expression:
|
59
|
+
√( (x₁ – x₂)² + (y₁ – y₂)² + ... + (yn – yn)²).
|
60
|
+
|
61
|
+
* ``'minkowski'`` ➜ The calculation of the distance is given by the expression:
|
62
|
+
( |X₁ – Y₁|p + |X₂ – Y₂|p + ... + |Xn – Yn|p) ¹/ₚ.
|
63
|
+
|
64
|
+
* ``'manhattan'`` ➜ The calculation of the distance is given by the expression:
|
65
|
+
( |x₁ – x₂| + |y₁ – y₂| + ... + |yn – yn|).
|
66
|
+
|
67
|
+
seed : Optional[int]
|
68
|
+
Seed for the random generation of detector values. Defaults to None.
|
69
|
+
use_mst_clustering : bool, default=True
|
70
|
+
If ``True``, performs clustering with **Minimum Spanning Tree** (MST). If ``False``,
|
71
|
+
does not perform clustering and predict returns None.
|
72
|
+
**kwargs
|
73
|
+
p : float
|
74
|
+
This parameter stores the value of ``p`` used in the Minkowski distance. The default
|
75
|
+
is ``2``, which represents normalized Euclidean distance.\
|
76
|
+
Different values of p lead to different variants of the Minkowski Distance.
|
77
|
+
|
78
|
+
References
|
79
|
+
----------
|
80
|
+
.. [1] de Castro, L. N., & Von Zuben, F. J. (2001).
|
81
|
+
*aiNet: An Artificial Immune Network for Data Analysis*.
|
82
|
+
Draft Chapter XII of the book *Data Mining: A Heuristic Approach*.
|
83
|
+
Department of Computer and Automation Engineering, University of Campinas.
|
84
|
+
Available at:
|
85
|
+
https://www.dca.fee.unicamp.br/~vonzuben/research/lnunes_dout/
|
86
|
+
artigos/DMHA.PDF
|
87
|
+
.. [2] SciPy Documentation. *Minimum Spanning Tree*.
|
88
|
+
https://docs.scipy.org/doc/scipy/reference/generated/
|
89
|
+
scipy.sparse.csgraph.minimum_spanning_tree
|
90
|
+
"""
|
91
|
+
|
92
|
+
def __init__(
|
93
|
+
self,
|
94
|
+
N: int = 50,
|
95
|
+
n_clone: int = 10,
|
96
|
+
top_clonal_memory_size: int = 5,
|
97
|
+
n_diversity_injection: int = 5,
|
98
|
+
affinity_threshold: float = 0.5,
|
99
|
+
suppression_threshold: float = 0.5,
|
100
|
+
mst_inconsistency_factor: float = 2.0,
|
101
|
+
max_iterations: int = 10,
|
102
|
+
k: int = 3,
|
103
|
+
metric: MetricType = "euclidean",
|
104
|
+
seed: Optional[int] = None,
|
105
|
+
use_mst_clustering: bool = True,
|
106
|
+
**kwargs
|
107
|
+
):
|
108
|
+
self.N: int = sanitize_param(N, 50, lambda x: x > 0)
|
109
|
+
self.n_clone: int = sanitize_param(n_clone, 10, lambda x: x > 0)
|
110
|
+
if top_clonal_memory_size is None:
|
111
|
+
self.top_clonal_memory_size: Optional[int] = None
|
112
|
+
else:
|
113
|
+
self.top_clonal_memory_size: Optional[int] = sanitize_param(
|
114
|
+
top_clonal_memory_size, 5, lambda x: x > 0
|
115
|
+
)
|
116
|
+
|
117
|
+
self.n_diversity_injection: int = sanitize_param(
|
118
|
+
n_diversity_injection, 5, lambda x: x > 0
|
119
|
+
)
|
120
|
+
self.affinity_threshold: float = sanitize_param(
|
121
|
+
affinity_threshold, 0.5, lambda x: x > 0
|
122
|
+
)
|
123
|
+
self.suppression_threshold: float = sanitize_param(
|
124
|
+
suppression_threshold, 0.5, lambda x: x > 0
|
125
|
+
)
|
126
|
+
self.mst_inconsistency_factor: float = sanitize_param(
|
127
|
+
mst_inconsistency_factor, 2, lambda x: x >= 0
|
128
|
+
)
|
129
|
+
self.max_iterations: int = sanitize_param(max_iterations, 10, lambda x: x > 0)
|
130
|
+
self.k: int = sanitize_param(k, 1, lambda x: x > 0)
|
131
|
+
self.seed: Optional[int] = sanitize_seed(seed)
|
132
|
+
self.use_mst_clustering: bool = use_mst_clustering
|
133
|
+
if self.seed is not None:
|
134
|
+
np.random.seed(self.seed)
|
135
|
+
set_seed_numba(self.seed)
|
136
|
+
|
137
|
+
self._feature_type: FeatureType = "continuous-features"
|
138
|
+
self.metric: str = sanitize_choice(
|
139
|
+
metric, ["euclidean", "manhattan", "minkowski"], "euclidean"
|
140
|
+
)
|
141
|
+
if self._feature_type == "binary-features":
|
142
|
+
self.metric = "hamming"
|
143
|
+
|
144
|
+
self.p: np.float64 = np.float64(kwargs.get("p", 2.0))
|
145
|
+
self._metric_params = {}
|
146
|
+
if self.metric == "minkowski":
|
147
|
+
self._metric_params['p'] = self.p
|
148
|
+
self.classes = []
|
149
|
+
self._memory_network: dict = {}
|
150
|
+
self._population_antibodies: Optional[npt.NDArray] = None
|
151
|
+
self._n_features: int = 0
|
152
|
+
self._bounds: Optional[npt.NDArray[np.float64]] = None
|
153
|
+
self._mst_structure: Optional[npt.NDArray] = None
|
154
|
+
self._mst_mean_distance: Optional[float] = None
|
155
|
+
self._mst_std_distance: Optional[float] = None
|
156
|
+
self._predict_cells = None
|
157
|
+
self._predict_labels = None
|
158
|
+
|
159
|
+
@property
|
160
|
+
def memory_network(self) -> dict:
|
161
|
+
"""Return the immune network representing clusters or graph structure."""
|
162
|
+
return self._memory_network
|
163
|
+
|
164
|
+
@property
|
165
|
+
def population_antibodies(self) -> Optional[npt.NDArray]:
|
166
|
+
"""Return the set of memory antibodies."""
|
167
|
+
return self._population_antibodies
|
168
|
+
|
169
|
+
@property
|
170
|
+
def mst(self) -> dict:
|
171
|
+
"""Returns the Minimum Spanning Tree and its statistics."""
|
172
|
+
return {
|
173
|
+
'graph': self._mst_structure,
|
174
|
+
'mean_distance': self._mst_mean_distance,
|
175
|
+
'std_distance': self._mst_std_distance
|
176
|
+
}
|
177
|
+
|
178
|
+
def fit(self, X: npt.NDArray, verbose: bool = True):
|
179
|
+
"""
|
180
|
+
Train the AiNet model on input data.
|
181
|
+
|
182
|
+
Parameters
|
183
|
+
----------
|
184
|
+
X : npt.NDArray
|
185
|
+
Input data used for training the model.
|
186
|
+
verbose : bool, default=True
|
187
|
+
Feedback from the progress bar showing current training interaction details.
|
188
|
+
|
189
|
+
Returns
|
190
|
+
-------
|
191
|
+
self : AiNet
|
192
|
+
Returns the instance of the class that implements this method.
|
193
|
+
"""
|
194
|
+
self._feature_type = detect_vector_data_type(X)
|
195
|
+
|
196
|
+
super()._check_and_raise_exceptions_fit(X)
|
197
|
+
|
198
|
+
match self._feature_type:
|
199
|
+
case "binary-features":
|
200
|
+
X = X.astype(np.bool_)
|
201
|
+
self.metric = "hamming"
|
202
|
+
case "ranged-features":
|
203
|
+
self._bounds = np.vstack([np.min(X, axis=0), np.max(X, axis=0)])
|
204
|
+
|
205
|
+
self._n_features = X.shape[1]
|
206
|
+
|
207
|
+
progress = tqdm(
|
208
|
+
total=self.max_iterations,
|
209
|
+
postfix="\n",
|
210
|
+
disable=not verbose,
|
211
|
+
bar_format="{desc} ┇{bar}┇ {n}/{total} total training interactions",
|
212
|
+
)
|
213
|
+
|
214
|
+
population_p = self._init_population_antibodies()
|
215
|
+
|
216
|
+
t: int = 1
|
217
|
+
while t <= self.max_iterations:
|
218
|
+
pool_memory = []
|
219
|
+
permutations = np.random.permutation(X.shape[0])
|
220
|
+
for antigen in X[permutations]:
|
221
|
+
clonal_memory = self._select_and_clone_population(antigen, population_p)
|
222
|
+
pool_memory.extend(self._clonal_suppression(antigen, clonal_memory))
|
223
|
+
pool_memory = self._memory_suppression(pool_memory)
|
224
|
+
|
225
|
+
if t < self.max_iterations:
|
226
|
+
pool_memory.extend(self._diversity_introduction())
|
227
|
+
population_p = np.asarray(pool_memory)
|
228
|
+
|
229
|
+
progress.update(1)
|
230
|
+
|
231
|
+
t += 1
|
232
|
+
self._population_antibodies = population_p
|
233
|
+
|
234
|
+
if self.use_mst_clustering:
|
235
|
+
self._build_mst()
|
236
|
+
self.update_clusters()
|
237
|
+
progress.set_description(
|
238
|
+
f"\033[92m✔ Set of memory antibodies for classes "
|
239
|
+
f"({', '.join(map(str, self.classes))}) successfully generated | "
|
240
|
+
f"Clusters: {len(self.classes)} | Population of antibodies size: "
|
241
|
+
f"{len(self._population_antibodies)}\033[0m"
|
242
|
+
)
|
243
|
+
progress.close()
|
244
|
+
|
245
|
+
return self
|
246
|
+
|
247
|
+
def predict(self, X) -> Optional[npt.NDArray]:
|
248
|
+
"""
|
249
|
+
Predict cluster labels for input data.
|
250
|
+
|
251
|
+
Parameters
|
252
|
+
----------
|
253
|
+
X : npt.NDArray
|
254
|
+
Data to predict.
|
255
|
+
|
256
|
+
Returns
|
257
|
+
-------
|
258
|
+
Predictions : Optional[npt.NDArray]
|
259
|
+
Predicted cluster labels, or None if clustering is disabled.
|
260
|
+
"""
|
261
|
+
if not self.use_mst_clustering or self._memory_network is None:
|
262
|
+
return None
|
263
|
+
|
264
|
+
super()._check_and_raise_exceptions_predict(
|
265
|
+
X, self._n_features, self._feature_type
|
266
|
+
)
|
267
|
+
|
268
|
+
c: list = []
|
269
|
+
|
270
|
+
all_cells_memory = [
|
271
|
+
(class_name, cell)
|
272
|
+
for class_name in self.classes
|
273
|
+
for cell in self._memory_network[class_name]
|
274
|
+
]
|
275
|
+
|
276
|
+
for line in X:
|
277
|
+
label_stim_list = [
|
278
|
+
(class_name, self._affinity(memory, line))
|
279
|
+
for class_name, memory in all_cells_memory
|
280
|
+
]
|
281
|
+
# Create the list with the k nearest neighbors and select the class with the most votes
|
282
|
+
k_nearest = nlargest(self.k, label_stim_list, key=lambda x: x[1])
|
283
|
+
votes = Counter(label for label, _ in k_nearest)
|
284
|
+
c.append(votes.most_common(1)[0][0])
|
285
|
+
return np.array(c)
|
286
|
+
|
287
|
+
def _init_population_antibodies(self) -> npt.NDArray:
|
288
|
+
"""
|
289
|
+
Initialize the antibody set of the network population randomly.
|
290
|
+
|
291
|
+
Returns
|
292
|
+
-------
|
293
|
+
npt.NDArray
|
294
|
+
List of initialized memories.
|
295
|
+
"""
|
296
|
+
return self._generate_random_antibodies(
|
297
|
+
self.N,
|
298
|
+
self._n_features,
|
299
|
+
self._feature_type,
|
300
|
+
self._bounds
|
301
|
+
)
|
302
|
+
|
303
|
+
def _select_and_clone_population(
|
304
|
+
self,
|
305
|
+
antigen: npt.NDArray,
|
306
|
+
population: npt.NDArray
|
307
|
+
) -> list:
|
308
|
+
"""
|
309
|
+
Select top antibodies by affinity and generate mutated clones.
|
310
|
+
|
311
|
+
Parameters
|
312
|
+
----------
|
313
|
+
antigen : npt.NDArray
|
314
|
+
The antigen for which affinities will be calculated.
|
315
|
+
population: list
|
316
|
+
The list of antibodies (solutions) to be evaluated and cloned.
|
317
|
+
|
318
|
+
Returns
|
319
|
+
-------
|
320
|
+
list[npt.NDArray]
|
321
|
+
List of mutated clones.
|
322
|
+
"""
|
323
|
+
affinities = self._calculate_affinities(antigen, population)
|
324
|
+
|
325
|
+
if self.top_clonal_memory_size is not None and self.top_clonal_memory_size > 0:
|
326
|
+
selected_idxs = np.argsort(-affinities)[:self.top_clonal_memory_size]
|
327
|
+
else:
|
328
|
+
selected_idxs = np.arange(affinities.shape[0])
|
329
|
+
|
330
|
+
clonal_m = []
|
331
|
+
for i in selected_idxs:
|
332
|
+
clones = self._clone_and_mutate(
|
333
|
+
population[i],
|
334
|
+
int(self.n_clone * affinities[i])
|
335
|
+
)
|
336
|
+
clonal_m.extend(clones)
|
337
|
+
|
338
|
+
return clonal_m
|
339
|
+
|
340
|
+
def _clonal_suppression(self, antigen: npt.NDArray, clones: list):
|
341
|
+
"""
|
342
|
+
Suppresses redundant clones based on affinity thresholds.
|
343
|
+
|
344
|
+
This function removes clones whose affinity with the antigen is lower than the defined
|
345
|
+
threshold (affinity_threshold) and eliminates redundant clones whose similarity with the
|
346
|
+
clones already selected exceeds the suppression threshold (suppression_threshold).
|
347
|
+
|
348
|
+
Parameters
|
349
|
+
----------
|
350
|
+
antigen : npt.NDArray
|
351
|
+
The antigen for which affinities will be calculated.
|
352
|
+
clones : list
|
353
|
+
The list of candidate clones to be suppressed.
|
354
|
+
|
355
|
+
Returns
|
356
|
+
-------
|
357
|
+
list
|
358
|
+
Non-redundant, high-affinity clones.
|
359
|
+
"""
|
360
|
+
suppression_affinity = [
|
361
|
+
clone for clone in clones
|
362
|
+
if self._affinity(clone, antigen) > self.affinity_threshold
|
363
|
+
]
|
364
|
+
return self._memory_suppression(suppression_affinity)
|
365
|
+
|
366
|
+
def _memory_suppression(self, pool_memory: list) -> list:
|
367
|
+
"""
|
368
|
+
Remove redundant antibodies from memory pool.
|
369
|
+
|
370
|
+
Calculate the affinity between all memory antibodies and remove redundant antibodies
|
371
|
+
whose similarity exceeds the suppression threshold.
|
372
|
+
|
373
|
+
Parameters
|
374
|
+
----------
|
375
|
+
pool_memory : list
|
376
|
+
antibodies memory.
|
377
|
+
|
378
|
+
Returns
|
379
|
+
-------
|
380
|
+
list
|
381
|
+
Memory pool without redundant antibodies.
|
382
|
+
"""
|
383
|
+
if not pool_memory:
|
384
|
+
return []
|
385
|
+
suppressed_memory = [pool_memory[0]]
|
386
|
+
for candidate in pool_memory[1:]:
|
387
|
+
affinities = self._calculate_affinities(
|
388
|
+
candidate.reshape(1, -1),
|
389
|
+
np.asarray(suppressed_memory)
|
390
|
+
)
|
391
|
+
|
392
|
+
if not np.any(affinities > self.suppression_threshold):
|
393
|
+
suppressed_memory.append(candidate)
|
394
|
+
return suppressed_memory
|
395
|
+
|
396
|
+
def _diversity_introduction(self):
|
397
|
+
"""
|
398
|
+
Introduce diversity into the antibody population.
|
399
|
+
|
400
|
+
Returns
|
401
|
+
-------
|
402
|
+
npt.NDArray
|
403
|
+
Array of new random antibodies for diversity introduction.
|
404
|
+
"""
|
405
|
+
return self._generate_random_antibodies(
|
406
|
+
self.n_diversity_injection,
|
407
|
+
self._n_features,
|
408
|
+
self._feature_type,
|
409
|
+
self._bounds
|
410
|
+
)
|
411
|
+
|
412
|
+
def _affinity(self, u: npt.NDArray, v: npt.NDArray) -> float:
|
413
|
+
"""
|
414
|
+
Calculate the stimulus between two vectors using metrics.
|
415
|
+
|
416
|
+
Parameters
|
417
|
+
----------
|
418
|
+
u : npt.NDArray
|
419
|
+
Coordinates of the first point.
|
420
|
+
v : npt.NDArray
|
421
|
+
Coordinates of the second point.
|
422
|
+
|
423
|
+
Returns
|
424
|
+
-------
|
425
|
+
float
|
426
|
+
Affinity score in [0, 1], where higher means more similar.
|
427
|
+
"""
|
428
|
+
distance: float
|
429
|
+
if self._feature_type == "binary-features":
|
430
|
+
distance = hamming(u, v)
|
431
|
+
else:
|
432
|
+
distance = compute_metric_distance(
|
433
|
+
u, v, get_metric_code(self.metric), self.p
|
434
|
+
)
|
435
|
+
|
436
|
+
return 1 - (distance / (1 + distance))
|
437
|
+
|
438
|
+
def _calculate_affinities(self, u: npt.NDArray, v: npt.NDArray) -> npt.NDArray:
|
439
|
+
"""
|
440
|
+
Calculate the affinity matrix between a reference vector and a set of target vectors.
|
441
|
+
|
442
|
+
Parameters
|
443
|
+
----------
|
444
|
+
u : npt.NDArray
|
445
|
+
An array with shape (n_features).
|
446
|
+
v : npt.NDArray
|
447
|
+
An array of vectors with shape (n_samples, n_features).
|
448
|
+
|
449
|
+
|
450
|
+
Returns
|
451
|
+
-------
|
452
|
+
npt.NDArray
|
453
|
+
One-dimensional array of shape (n_samples,), containing the affinities between `u`
|
454
|
+
and each vector in `v`.
|
455
|
+
"""
|
456
|
+
u = np.reshape(u, (1, -1))
|
457
|
+
v = np.atleast_2d(v)
|
458
|
+
distances = cdist(u, v, metric=self.metric, **self._metric_params)[0]
|
459
|
+
|
460
|
+
return 1 - (distances / (1 + distances))
|
461
|
+
|
462
|
+
def _clone_and_mutate(self, antibody: npt.NDArray, n_clone: int) -> npt.NDArray:
|
463
|
+
"""
|
464
|
+
Generate mutated clones from an antibody, based on the feature type.
|
465
|
+
|
466
|
+
Parameters
|
467
|
+
----------
|
468
|
+
antibody : npt.NDArray
|
469
|
+
Original antibody vector to be cloned and mutated.
|
470
|
+
n_clone : int
|
471
|
+
Number of clones to generate.
|
472
|
+
|
473
|
+
Returns
|
474
|
+
-------
|
475
|
+
npt.NDArray
|
476
|
+
Array of shape (n_clone, len(antibody)) containing mutated clones
|
477
|
+
"""
|
478
|
+
if self._feature_type == "binary-features":
|
479
|
+
return clone_and_mutate_binary(antibody, n_clone)
|
480
|
+
if self._feature_type == "ranged-features" and self._bounds is not None:
|
481
|
+
return clone_and_mutate_ranged(antibody, n_clone, self._bounds)
|
482
|
+
return clone_and_mutate_continuous(antibody, n_clone)
|
483
|
+
|
484
|
+
def _build_mst(self):
|
485
|
+
"""Construct the Minimum Spanning Tree (MST) for the antibody population.
|
486
|
+
|
487
|
+
Computes the pairwise distances between antibodies, builds the MST from
|
488
|
+
these distances, and stores the MST structure along with the mean and
|
489
|
+
standard deviation of its edge weights.
|
490
|
+
|
491
|
+
Raises
|
492
|
+
------
|
493
|
+
ValueError
|
494
|
+
If the antibody population is empty.
|
495
|
+
"""
|
496
|
+
if self._population_antibodies is None or len(self._population_antibodies) == 0:
|
497
|
+
raise ValueError("Population of antibodies is empty")
|
498
|
+
|
499
|
+
antibodies_matrix = squareform(
|
500
|
+
pdist(self._population_antibodies, metric=self.metric, **self._metric_params)
|
501
|
+
)
|
502
|
+
antibodies_mst = minimum_spanning_tree(antibodies_matrix).toarray()
|
503
|
+
self._mst_structure = antibodies_mst
|
504
|
+
nonzero_edges = antibodies_mst[antibodies_mst > 0]
|
505
|
+
self._mst_mean_distance = float(np.mean(nonzero_edges)) if nonzero_edges.size else 0.0
|
506
|
+
self._mst_std_distance = float(np.std(nonzero_edges)) if nonzero_edges.size else 0.0
|
507
|
+
|
508
|
+
def update_clusters(self, mst_inconsistency_factor: Optional[float] = None):
|
509
|
+
"""Partition the clusters based on the MST inconsistency factor.
|
510
|
+
|
511
|
+
Uses the precomputed Minimum Spanning Tree (MST) of the antibody population
|
512
|
+
to redefine clusters. Edges whose weights exceed the mean plus the
|
513
|
+
`mst_inconsistency_factor` multiplied by the standard deviation of MST edge
|
514
|
+
weights are removed. Each connected component after pruning is treated as a
|
515
|
+
distinct cluster.
|
516
|
+
|
517
|
+
Parameters
|
518
|
+
----------
|
519
|
+
mst_inconsistency_factor : float, optional
|
520
|
+
If provided, overrides the current inconsistency factor.
|
521
|
+
|
522
|
+
Raises
|
523
|
+
------
|
524
|
+
ValueError
|
525
|
+
If the Minimum Spanning Tree (MST) has not yet been created
|
526
|
+
|
527
|
+
Updates
|
528
|
+
-------
|
529
|
+
self._memory_network : dict[int, npt.NDArray]
|
530
|
+
Dictionary mapping cluster labels to antibody arrays.
|
531
|
+
self.classes : list
|
532
|
+
List of cluster labels.
|
533
|
+
"""
|
534
|
+
if self._mst_structure is None:
|
535
|
+
raise ValueError("The Minimum Spanning Tree (MST) has not yet been created.")
|
536
|
+
|
537
|
+
if mst_inconsistency_factor is not None:
|
538
|
+
self.mst_inconsistency_factor = mst_inconsistency_factor
|
539
|
+
|
540
|
+
antibodies_mst = self._mst_structure.copy()
|
541
|
+
|
542
|
+
thresholds = antibodies_mst > (
|
543
|
+
self._mst_mean_distance + self.mst_inconsistency_factor * self._mst_std_distance
|
544
|
+
)
|
545
|
+
antibodies_mst[thresholds] = 0
|
546
|
+
|
547
|
+
n_antibodies, labels = connected_components(csgraph=antibodies_mst, directed=False)
|
548
|
+
|
549
|
+
self._memory_network = {
|
550
|
+
label: self._population_antibodies[labels == label]
|
551
|
+
for label in range(n_antibodies)
|
552
|
+
}
|
553
|
+
self.classes = np.array(list(self._memory_network.keys()))
|
aisp/ina/_base.py
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
"""Base Class for Network Theory Algorithms."""
|
2
|
+
|
3
|
+
from abc import ABC
|
4
|
+
from typing import Optional
|
5
|
+
|
6
|
+
from numpy import typing as npt
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
|
10
|
+
from ..base import BaseClusterer
|
11
|
+
from ..exceptions import FeatureDimensionMismatch
|
12
|
+
from ..utils.types import FeatureType
|
13
|
+
|
14
|
+
|
15
|
+
class BaseAiNet(BaseClusterer, ABC):
|
16
|
+
"""Abstract base class for AINet-based clustering algorithms."""
|
17
|
+
|
18
|
+
@staticmethod
|
19
|
+
def _check_and_raise_exceptions_fit(
|
20
|
+
X: npt.NDArray
|
21
|
+
):
|
22
|
+
"""
|
23
|
+
Verify the fit parameters and throw exceptions if the verification is not successful.
|
24
|
+
|
25
|
+
Parameters
|
26
|
+
----------
|
27
|
+
X : npt.NDArray
|
28
|
+
Training array, containing the samples and their characteristics,
|
29
|
+
[``N samples`` (rows)][``N features`` (columns)].
|
30
|
+
|
31
|
+
Raises
|
32
|
+
------
|
33
|
+
TypeError
|
34
|
+
If X is not an ndarray or list.
|
35
|
+
"""
|
36
|
+
if not isinstance(X, np.ndarray) and not isinstance(X, list):
|
37
|
+
raise TypeError("X is not an ndarray or list.")
|
38
|
+
|
39
|
+
@staticmethod
|
40
|
+
def _check_and_raise_exceptions_predict(
|
41
|
+
X: npt.NDArray,
|
42
|
+
expected: int = 0,
|
43
|
+
feature_type: FeatureType = "continuous-features"
|
44
|
+
) -> None:
|
45
|
+
"""
|
46
|
+
Verify the predict parameters and throw exceptions if the verification is not successful.
|
47
|
+
|
48
|
+
Parameters
|
49
|
+
----------
|
50
|
+
X : npt.NDArray
|
51
|
+
Input array for prediction, containing the samples and their characteristics,
|
52
|
+
[``N samples`` (rows)][``N features`` (columns)].
|
53
|
+
expected : int, default=0
|
54
|
+
Expected number of features per sample (columns in X).
|
55
|
+
feature_type : FeatureType, default="continuous-features"
|
56
|
+
Specifies the type of features: "continuous-features", "binary-features",
|
57
|
+
or "ranged-features".
|
58
|
+
|
59
|
+
|
60
|
+
Raises
|
61
|
+
------
|
62
|
+
TypeError
|
63
|
+
If X is not a ndarray or list.
|
64
|
+
FeatureDimensionMismatch
|
65
|
+
If the number of features in X does not match the expected number.
|
66
|
+
ValueError
|
67
|
+
If feature_type is "binary-features" and X contains values other than 0 and 1.
|
68
|
+
"""
|
69
|
+
if not isinstance(X, (np.ndarray, list)):
|
70
|
+
raise TypeError("X is not an ndarray or list")
|
71
|
+
if expected != len(X[0]):
|
72
|
+
raise FeatureDimensionMismatch(
|
73
|
+
expected,
|
74
|
+
len(X[0]),
|
75
|
+
"X"
|
76
|
+
)
|
77
|
+
|
78
|
+
if feature_type != "binary-features":
|
79
|
+
return
|
80
|
+
|
81
|
+
# Checks if matrix X contains only binary samples. Otherwise, raises an exception.
|
82
|
+
if not np.isin(X, [0, 1]).all():
|
83
|
+
raise ValueError(
|
84
|
+
"The array X contains values that are not composed only of 0 and 1."
|
85
|
+
)
|
86
|
+
|
87
|
+
@staticmethod
|
88
|
+
def _generate_random_antibodies(
|
89
|
+
n_samples: int,
|
90
|
+
n_features: int,
|
91
|
+
feature_type: FeatureType = "continuous-features",
|
92
|
+
bounds: Optional[npt.NDArray[np.float64]] = None
|
93
|
+
) -> npt.NDArray:
|
94
|
+
"""
|
95
|
+
Generate a random antibody population.
|
96
|
+
|
97
|
+
Parameters
|
98
|
+
----------
|
99
|
+
n_samples : int
|
100
|
+
Number of antibodies (samples) to generate.
|
101
|
+
n_features : int
|
102
|
+
Number of features (dimensions) for each antibody.
|
103
|
+
feature_type : FeatureType, default="continuous-features"
|
104
|
+
Specifies the type of features: "continuous-features", "binary-features",
|
105
|
+
or "ranged-features".
|
106
|
+
bounds : np.ndarray
|
107
|
+
Array (n_features, 2) with min and max per dimension.
|
108
|
+
|
109
|
+
Returns
|
110
|
+
-------
|
111
|
+
npt.NDArray
|
112
|
+
Array of shape (n_samples, n_features) containing the generated antibodies.
|
113
|
+
Data type depends on the feature_type type (float for continuous/ranged, bool for
|
114
|
+
binary).
|
115
|
+
"""
|
116
|
+
if n_features <= 0:
|
117
|
+
raise ValueError("Number of features must be greater than zero.")
|
118
|
+
|
119
|
+
if feature_type == "binary-features":
|
120
|
+
return np.random.randint(0, 2, size=(n_samples, n_features)).astype(np.bool_)
|
121
|
+
if feature_type == "ranged-features" and bounds is not None:
|
122
|
+
return np.random.uniform(low=bounds[0], high=bounds[1], size=(n_samples, n_features))
|
123
|
+
|
124
|
+
return np.random.random_sample(size=(n_samples, n_features))
|
aisp/nsa/_negative_selection.py
CHANGED
@@ -6,6 +6,7 @@ from tqdm import tqdm
|
|
6
6
|
import numpy as np
|
7
7
|
import numpy.typing as npt
|
8
8
|
|
9
|
+
from ..base import set_seed_numba
|
9
10
|
from ._ns_core import (
|
10
11
|
check_detector_bnsa_validity,
|
11
12
|
bnsa_class_prediction,
|
@@ -98,6 +99,7 @@ class RNSA(BaseNSA):
|
|
98
99
|
self.seed: Optional[int] = sanitize_seed(seed)
|
99
100
|
if self.seed is not None:
|
100
101
|
np.random.seed(seed)
|
102
|
+
set_seed_numba(self.seed)
|
101
103
|
self.k: int = sanitize_param(k, 1, lambda x: x > 1)
|
102
104
|
self.N: int = sanitize_param(N, 100, lambda x: x >= 1)
|
103
105
|
self.r: float = sanitize_param(r, 0.05, lambda x: x > 0)
|
@@ -148,7 +150,6 @@ class RNSA(BaseNSA):
|
|
148
150
|
self : RNSA
|
149
151
|
Returns the instance itself.
|
150
152
|
"""
|
151
|
-
progress = None
|
152
153
|
super()._check_and_raise_exceptions_fit(X, y)
|
153
154
|
|
154
155
|
# Identifying the possible classes within the output array `y`.
|
@@ -158,22 +159,21 @@ class RNSA(BaseNSA):
|
|
158
159
|
# Separates the classes for training.
|
159
160
|
sample_index = self._slice_index_list_by_class(y)
|
160
161
|
# Progress bar for generating all detectors.
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
162
|
+
progress = tqdm(
|
163
|
+
total=int(self.N * (len(self.classes))),
|
164
|
+
bar_format="{desc} ┇{bar}┇ {n}/{total} detectors",
|
165
|
+
postfix="\n",
|
166
|
+
disable=not verbose
|
167
|
+
)
|
167
168
|
for _class_ in self.classes:
|
168
169
|
# Initializes the empty set that will contain the valid detectors.
|
169
170
|
valid_detectors_set = []
|
170
171
|
discard_count = 0
|
171
172
|
x_class = X[sample_index[_class_]]
|
172
173
|
# Indicating which class the algorithm is currently processing for the progress bar.
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
)
|
174
|
+
progress.set_description_str(
|
175
|
+
f"Generating the detectors for the {_class_} class:"
|
176
|
+
)
|
177
177
|
while len(valid_detectors_set) < self.N:
|
178
178
|
# Generates a candidate detector vector randomly with values between 0 and 1.
|
179
179
|
vector_x = np.random.random_sample(size=X.shape[1])
|
@@ -188,8 +188,7 @@ class RNSA(BaseNSA):
|
|
188
188
|
else:
|
189
189
|
radius = None
|
190
190
|
valid_detectors_set.append(Detector(vector_x, radius))
|
191
|
-
|
192
|
-
progress.update(1)
|
191
|
+
progress.update(1)
|
193
192
|
else:
|
194
193
|
discard_count += 1
|
195
194
|
if discard_count == self.max_discards:
|
@@ -198,11 +197,11 @@ class RNSA(BaseNSA):
|
|
198
197
|
# Add detectors, with classes as keys in the dictionary.
|
199
198
|
list_detectors_by_class[_class_] = valid_detectors_set
|
200
199
|
# Notify completion of detector generation for the classes.
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
200
|
+
progress.set_description(
|
201
|
+
f"\033[92m✔ Non-self detectors for classes ({', '.join(map(str, self.classes))}) "
|
202
|
+
f"successfully generated\033[0m"
|
203
|
+
)
|
204
|
+
progress.close()
|
206
205
|
# Saves the found detectors in the attribute for the non-self detectors of the trained model
|
207
206
|
self._detectors = list_detectors_by_class
|
208
207
|
return self
|
@@ -347,7 +346,6 @@ class RNSA(BaseNSA):
|
|
347
346
|
knn[self.k - 1] = distance
|
348
347
|
knn.sort()
|
349
348
|
|
350
|
-
|
351
349
|
def __compare_sample_to_detectors(self, line: npt.NDArray) -> Optional[str]:
|
352
350
|
"""
|
353
351
|
Compare a sample with the detectors, verifying if the sample is proper.
|
@@ -448,7 +446,7 @@ class RNSA(BaseNSA):
|
|
448
446
|
if (p - new_detector_r) < 0 or (p + new_detector_r) > 1:
|
449
447
|
return False
|
450
448
|
|
451
|
-
return
|
449
|
+
return True, new_detector_r
|
452
450
|
|
453
451
|
|
454
452
|
class BNSA(BaseNSA):
|
@@ -534,7 +532,6 @@ class BNSA(BaseNSA):
|
|
534
532
|
Returns the instance it self.
|
535
533
|
"""
|
536
534
|
super()._check_and_raise_exceptions_fit(X, y, "BNSA")
|
537
|
-
progress = None
|
538
535
|
# Converts the entire array X to boolean
|
539
536
|
X = X.astype(np.bool_)
|
540
537
|
|
@@ -545,22 +542,22 @@ class BNSA(BaseNSA):
|
|
545
542
|
# Separates the classes for training.
|
546
543
|
sample_index: dict = self._slice_index_list_by_class(y)
|
547
544
|
# Progress bar for generating all detectors.
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
545
|
+
|
546
|
+
progress = tqdm(
|
547
|
+
total=int(self.N * (len(self.classes))),
|
548
|
+
bar_format="{desc} ┇{bar}┇ {n}/{total} detectors",
|
549
|
+
postfix="\n",
|
550
|
+
disable=not verbose
|
551
|
+
)
|
554
552
|
|
555
553
|
for _class_ in self.classes:
|
556
554
|
# Initializes the empty set that will contain the valid detectors.
|
557
555
|
valid_detectors_set: list = []
|
558
556
|
discard_count: int = 0
|
559
557
|
# Updating the progress bar with the current class the algorithm is processing.
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
)
|
558
|
+
progress.set_description_str(
|
559
|
+
f"Generating the detectors for the {_class_} class:"
|
560
|
+
)
|
564
561
|
x_class = X[sample_index[_class_]]
|
565
562
|
while len(valid_detectors_set) < self.N:
|
566
563
|
# Generates a candidate detector vector randomly with values 0 and 1.
|
@@ -569,8 +566,7 @@ class BNSA(BaseNSA):
|
|
569
566
|
if check_detector_bnsa_validity(x_class, vector_x, self.aff_thresh):
|
570
567
|
discard_count = 0
|
571
568
|
valid_detectors_set.append(vector_x)
|
572
|
-
|
573
|
-
progress.update(1)
|
569
|
+
progress.update(1)
|
574
570
|
else:
|
575
571
|
discard_count += 1
|
576
572
|
if discard_count == self.max_discards:
|
@@ -580,11 +576,11 @@ class BNSA(BaseNSA):
|
|
580
576
|
list_detectors_by_class[_class_] = np.array(valid_detectors_set)
|
581
577
|
|
582
578
|
# Notify the completion of detector generation for the classes.
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
579
|
+
progress.set_description(
|
580
|
+
f"\033[92m✔ Non-self detectors for classes ({', '.join(map(str, self.classes))}) "
|
581
|
+
f"successfully generated\033[0m"
|
582
|
+
)
|
583
|
+
progress.close()
|
588
584
|
# Saves the found detectors in the attribute for the class detectors.
|
589
585
|
self._detectors = list_detectors_by_class
|
590
586
|
self._detectors_stack = np.array(
|
aisp/utils/distance.py
CHANGED
@@ -5,10 +5,10 @@ import numpy.typing as npt
|
|
5
5
|
from numba import njit, types
|
6
6
|
from numpy import float64
|
7
7
|
|
8
|
-
EUCLIDEAN = 0
|
9
|
-
MANHATTAN = 1
|
10
|
-
MINKOWSKI = 2
|
11
|
-
HAMMING = 3
|
8
|
+
EUCLIDEAN: int = 0
|
9
|
+
MANHATTAN: int = 1
|
10
|
+
MINKOWSKI: int = 2
|
11
|
+
HAMMING: int = 3
|
12
12
|
|
13
13
|
|
14
14
|
@njit([(types.boolean[:], types.boolean[:])], cache=True)
|
aisp/utils/validation.py
CHANGED
@@ -14,9 +14,9 @@ def detect_vector_data_type(
|
|
14
14
|
Detect the type of data in a vector.
|
15
15
|
|
16
16
|
The function detects if the vector contains data of type:
|
17
|
-
-
|
18
|
-
-
|
19
|
-
-
|
17
|
+
- Binary features: boolean values or integers restricted to 0 and 1.
|
18
|
+
- Continuous features: floating-point values in the normalized range [0.0, 1.0].
|
19
|
+
- Ranged features: floating-point values outside the normalized range.
|
20
20
|
|
21
21
|
Parameters
|
22
22
|
----------
|
@@ -25,8 +25,8 @@ def detect_vector_data_type(
|
|
25
25
|
|
26
26
|
Returns
|
27
27
|
-------
|
28
|
-
|
29
|
-
The
|
28
|
+
str
|
29
|
+
The data type of the vector: "binary-features", "continuous-features", or "ranged-features".
|
30
30
|
|
31
31
|
Raises
|
32
32
|
------
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: aisp
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.3.0
|
4
4
|
Summary: Package with techniques of artificial immune systems.
|
5
5
|
Author-email: João Paulo da Silva Barros <jpsilvabarr@gmail.com>
|
6
6
|
Maintainer-email: Alison Zille Lopes <alisonzille@gmail.com>
|
@@ -84,10 +84,11 @@ Artificial Immune Systems (AIS) are inspired by the vertebrate immune system, cr
|
|
84
84
|
##### Algorithms implemented:
|
85
85
|
|
86
86
|
> - [x] [**Negative Selection.**](https://ais-package.github.io/docs/aisp-techniques/Negative%20Selection/)
|
87
|
-
> - [x] **Clonal Selection Algorithms.**
|
88
|
-
> * [AIRS - Artificial Immune Recognition System](https://ais-package.github.io/docs/aisp-techniques/Clonal%20Selection%20Algorithms/)
|
89
|
-
> - [ ] *
|
90
|
-
> - [
|
87
|
+
> - [x] [**Clonal Selection Algorithms.**](https://ais-package.github.io/docs/aisp-techniques/Clonal%20Selection%20Algorithms/)
|
88
|
+
> * [AIRS - Artificial Immune Recognition System](https://ais-package.github.io/docs/aisp-techniques/Clonal%20Selection%20Algorithms/airs/)
|
89
|
+
> - [ ] *Danger Theory.*
|
90
|
+
> - [x] [*Immune Network Theory.*](https://ais-package.github.io/docs/aisp-techniques/Immune%20Network%20Theory)
|
91
|
+
> - [AiNet - Artificial Immune Network para Clustering and Compression](https://ais-package.github.io/docs/aisp-techniques/Immune%20Network%20Theory/ainet)
|
91
92
|
|
92
93
|
</section>
|
93
94
|
|
@@ -0,0 +1,30 @@
|
|
1
|
+
aisp/__init__.py,sha256=GUbFSuDKAfnn80xxK4giPZxuAakRl5dHC5gdp_WTJU0,111
|
2
|
+
aisp/exceptions.py,sha256=I9JaQx6p8Jo7qjwwcrqnuewQgyBdUnOSSZofPoBeDNE,1954
|
3
|
+
aisp/base/__init__.py,sha256=cDDN6YcYqSU080AvEankhUtIALkSTVmm6XTa48htHjU,211
|
4
|
+
aisp/base/_base.py,sha256=uTVh__hQJGe8RCOzCet4ZV3vQbwgj5fXAt4Jdf0x1r0,1792
|
5
|
+
aisp/base/_classifier.py,sha256=yGxRGhJxN8jIpr6S7_fsaEOCMPFws8rNCF4E-hYs37E,3339
|
6
|
+
aisp/base/_clusterer.py,sha256=VKwhX8oHMc7ylsSu2jnbw3uar3GHc2AMSNPMEmwrPo0,2432
|
7
|
+
aisp/base/mutation.py,sha256=A_AlGp8S4ooFEMW3Jgv0n0Y6tbhfusaMMWFsoH4HmD8,4762
|
8
|
+
aisp/csa/__init__.py,sha256=708jwpqia10bqmh-4-_srwwNuBh7jf2Zix-u8Hfbzmk,348
|
9
|
+
aisp/csa/_ai_recognition_sys.py,sha256=_niEark6HNsu9ognXussura16KeCw4mi3xU4Xm18hQo,18760
|
10
|
+
aisp/csa/_base.py,sha256=jR1IIhGINn7DLo8V5iJinDn-wW-t6etcE39bAZnQylw,3595
|
11
|
+
aisp/csa/_cell.py,sha256=GUxnzvPyIbBm1YYkMhSx0tcV_oyDhJ7wAo5gtr_1CoY,1845
|
12
|
+
aisp/ina/__init__.py,sha256=cOnxGcxrBdg6lLv2w2sdlToMahKMh_Gw57AfUUPQjMo,329
|
13
|
+
aisp/ina/_ai_network.py,sha256=aNXNWdFvgmjqki7-lWApLZWq5w1OVUuZpgxsnluiqNE,21053
|
14
|
+
aisp/ina/_base.py,sha256=x9eFUKiAcXSfwqVyBVmS54FDeIcApEtFPGruZvwQOwQ,4404
|
15
|
+
aisp/nsa/__init__.py,sha256=3cXuBmO-_Dp3-8ZG3Eu8e_bD1JDb-RH4Wu0UDNVD1bs,385
|
16
|
+
aisp/nsa/_base.py,sha256=3YKlZzA3yhP2uQHfhyKswbHUutlxkOR4wn6N10nSO-w,4119
|
17
|
+
aisp/nsa/_negative_selection.py,sha256=4FA0fwGVHpSsParsUUdNnnv0FYtJS4_olZBWWiPODk8,28153
|
18
|
+
aisp/nsa/_ns_core.py,sha256=SXkZL-p2VQygU4Pf6J5AP_yPzU4cR6aU6wx-e_vlm-c,5021
|
19
|
+
aisp/utils/__init__.py,sha256=RzpKhkg8nCZi4G0C4il97f3ESYs7Bbxq6EjTeOQQUGk,195
|
20
|
+
aisp/utils/_multiclass.py,sha256=nWd58ayVfxgdopBQc9b_xywkolJ2fGW3AN-JoD2A9Fw,1134
|
21
|
+
aisp/utils/distance.py,sha256=pY23YGZpu6qVCCkZfhaEpRazUULfVUy2piyzYuAryN0,6576
|
22
|
+
aisp/utils/metrics.py,sha256=zDAScDbHRnfu24alRcZ6fEIUaWNoCD-QCtOCFBWPPo8,1277
|
23
|
+
aisp/utils/sanitizers.py,sha256=u1GizdJ-RKfPWJLnuFiM09lpItZMhDR_EvK8YdVHwDk,1858
|
24
|
+
aisp/utils/types.py,sha256=KELzr1kSBT7hHdsABoIS1xmEBGj6gRSH5A5YNG36I_c,1324
|
25
|
+
aisp/utils/validation.py,sha256=RqcS2VdFXkNcOH_7Y3yPi7FBoGWR_ReLBPDBx0UMCqI,1431
|
26
|
+
aisp-0.3.0.dist-info/licenses/LICENSE,sha256=fTqV5eBpeAZO0_jit8j4Ref9ikBSlHJ8xwj5TLg7gFk,7817
|
27
|
+
aisp-0.3.0.dist-info/METADATA,sha256=hYtREi4OT36M5B0LgIKnnrzdQ1SZKwzYNdpIT-O7Hwg,5173
|
28
|
+
aisp-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
29
|
+
aisp-0.3.0.dist-info/top_level.txt,sha256=Q5aJi_rAVT5UNS1As0ZafoyS5dwNibnoyOYV7RWUB9s,5
|
30
|
+
aisp-0.3.0.dist-info/RECORD,,
|
aisp-0.2.1.dist-info/RECORD
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
aisp/__init__.py,sha256=N5aAyup46_tqU9cXfYfGuR3bdfAjcvaPc1xwFdGdD7A,112
|
2
|
-
aisp/exceptions.py,sha256=I9JaQx6p8Jo7qjwwcrqnuewQgyBdUnOSSZofPoBeDNE,1954
|
3
|
-
aisp/base/__init__.py,sha256=k2Ww9hej_32ekYhhCiYGEMLgOmDKwRt261HZ8rEurwA,102
|
4
|
-
aisp/base/_classifier.py,sha256=Ud8VLE7vNh1ddpNNg0RVET2RXCd7kvzvfvNKHKNn_GM,3734
|
5
|
-
aisp/base/mutation.py,sha256=A_AlGp8S4ooFEMW3Jgv0n0Y6tbhfusaMMWFsoH4HmD8,4762
|
6
|
-
aisp/csa/__init__.py,sha256=cJSKkbvNTpR_CKCL--h99fNPiMf3fJ73gFnZRq7uyVM,355
|
7
|
-
aisp/csa/_ai_immune_recognition_sys.py,sha256=_XqTHjqEO6sGZiIRlNNLe6Lz2PDFfDCtsbpucClvYmA,18878
|
8
|
-
aisp/csa/_base.py,sha256=jR1IIhGINn7DLo8V5iJinDn-wW-t6etcE39bAZnQylw,3595
|
9
|
-
aisp/csa/_cell.py,sha256=GUxnzvPyIbBm1YYkMhSx0tcV_oyDhJ7wAo5gtr_1CoY,1845
|
10
|
-
aisp/nsa/__init__.py,sha256=3cXuBmO-_Dp3-8ZG3Eu8e_bD1JDb-RH4Wu0UDNVD1bs,385
|
11
|
-
aisp/nsa/_base.py,sha256=3YKlZzA3yhP2uQHfhyKswbHUutlxkOR4wn6N10nSO-w,4119
|
12
|
-
aisp/nsa/_negative_selection.py,sha256=aMdbIrd4TdPxaAkHHY-HbbM5kd5f81HbE3DyB73ttX4,28467
|
13
|
-
aisp/nsa/_ns_core.py,sha256=SXkZL-p2VQygU4Pf6J5AP_yPzU4cR6aU6wx-e_vlm-c,5021
|
14
|
-
aisp/utils/__init__.py,sha256=RzpKhkg8nCZi4G0C4il97f3ESYs7Bbxq6EjTeOQQUGk,195
|
15
|
-
aisp/utils/_multiclass.py,sha256=nWd58ayVfxgdopBQc9b_xywkolJ2fGW3AN-JoD2A9Fw,1134
|
16
|
-
aisp/utils/distance.py,sha256=pIt76OUiwCry6eNEuWLYvUiW4KkeU6egjjnnmroFet8,6556
|
17
|
-
aisp/utils/metrics.py,sha256=zDAScDbHRnfu24alRcZ6fEIUaWNoCD-QCtOCFBWPPo8,1277
|
18
|
-
aisp/utils/sanitizers.py,sha256=u1GizdJ-RKfPWJLnuFiM09lpItZMhDR_EvK8YdVHwDk,1858
|
19
|
-
aisp/utils/types.py,sha256=KELzr1kSBT7hHdsABoIS1xmEBGj6gRSH5A5YNG36I_c,1324
|
20
|
-
aisp/utils/validation.py,sha256=ya7Y_6Lv7L6LAHC11EAfZRqqneCsOqrjG8i2EQFZcpA,1418
|
21
|
-
aisp-0.2.1.dist-info/licenses/LICENSE,sha256=fTqV5eBpeAZO0_jit8j4Ref9ikBSlHJ8xwj5TLg7gFk,7817
|
22
|
-
aisp-0.2.1.dist-info/METADATA,sha256=bhd0eOBVOuNN8gZ-jN3L4QWV17Qa0R2Kb-syKX5PK1U,4844
|
23
|
-
aisp-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
24
|
-
aisp-0.2.1.dist-info/top_level.txt,sha256=Q5aJi_rAVT5UNS1As0ZafoyS5dwNibnoyOYV7RWUB9s,5
|
25
|
-
aisp-0.2.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|