PyPI - aisp - Versions diffs - 0.1.34__py3-none-any.whl → 0.1.40__py3-none-any.whl - Mend

aisp 0.1.34py3-none-any.whl → 0.1.40py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

aisp/__init__.py +4 -0
aisp/base/__init__.py +4 -0
aisp/base/_classifier.py +90 -0
aisp/exceptions.py +42 -0
aisp/nsa/__init__.py +11 -0
aisp/nsa/_base.py +118 -0
aisp/nsa/_negative_selection.py +682 -0
aisp/nsa/_ns_core.py +153 -0
aisp/utils/__init__.py +2 -1
aisp/utils/_multiclass.py +16 -30
aisp/utils/distance.py +215 -0
aisp/utils/metrics.py +22 -43
aisp/utils/sanitizers.py +55 -0
{aisp-0.1.34.dist-info → aisp-0.1.40.dist-info}/METADATA +11 -111
aisp-0.1.40.dist-info/RECORD +18 -0
{aisp-0.1.34.dist-info → aisp-0.1.40.dist-info}/WHEEL +1 -1
aisp/NSA/__init__.py +0 -18
aisp/NSA/_base.py +0 -281
aisp/NSA/_negative_selection.py +0 -1115
aisp-0.1.34.dist-info/RECORD +0 -11
{aisp-0.1.34.dist-info → aisp-0.1.40.dist-info}/licenses/LICENSE +0 -0
{aisp-0.1.34.dist-info → aisp-0.1.40.dist-info}/top_level.txt +0 -0

aisp/nsa/_ns_core.py ADDED Viewed

@@ -0,0 +1,153 @@
+"""ns: Negative Selection
+The functions perform detector checks and utilize Numba decorators for Just-In-Time compilation
+"""
+import numpy.typing as npt
+from numba import njit, types
+from ..utils.distance import compute_metric_distance, hamming
+@njit(
+    [(
+        types.boolean[:, :],
+        types.boolean[:],
+        types.float64
+    )],
+    cache=True
+)
+def check_detector_bnsa_validity(
+    x_class: npt.NDArray,
+    vector_x: npt.NDArray,
+    aff_thresh: float
+) -> bool:
+    """
+    Checks the validity of a candidate detector (vector_x) against samples from a class (x_class)
+    using the Hamming distance. A detector is considered INVALID if its distance to any sample
+    in ``x_class`` is less than or equal to ``aff_thresh``.
+    Parameters
+    ----------
+    * x_class (``npt.NDArray``): Array containing the class samples. Expected shape:
+        (n_samples, n_features).
+    * vector_x (``npt.NDArray``): Array representing the detector. Expected shape: (n_features,).
+    * aff_thresh (``float``): Affinity threshold.
+    Returns
+    ----------
+    * True if the detector is valid, False otherwise.
+    """
+    n = x_class.shape[1]
+    if n != vector_x.shape[0]:
+        return False
+    for i in range(x_class.shape[0]):
+        # Calculate the normalized Hamming Distance
+        if hamming(x_class[i], vector_x) <= aff_thresh:
+            return False
+    return True
+@njit(
+    [(
+        types.boolean[:],
+        types.boolean[:, :, :],
+        types.float64
+    )],
+    cache=True
+)
+def bnsa_class_prediction(
+    features: npt.NDArray,
+    class_detectors: npt.NDArray,
+    aff_thresh: float
+) -> int:
+    """
+    Defines the class of a sample from the non-self detectors.
+    Parameters
+    ----------
+    * features (``npt.NDArray``): binary sample to be classified (shape: [n_features]).
+    * class_detectors (``npt.NDArray``): Array containing the detectors of all classes
+    (shape: [n_classes, n_detectors, n_features]).
+    * aff_thresh (``float``): Affinity threshold that determines whether a detector recognizes the
+    sample as non-self.
+    Returns
+    ----------
+    * int: Index of the predicted class. Returns -1 if it is non-self for all classes.
+    """
+    n_classes, n_detectors, _ = class_detectors.shape
+    best_class_idx = -1
+    best_avg_distance = 0
+    for class_index in range(n_classes):
+        total_distance = 0.0
+        class_found = True
+        # Calculates the Hamming distance between the row and all detectors.
+        for detector_index in range(n_detectors):
+            # Calculates the normalized Hamming distance between the sample and the detector
+            distance = hamming(features, class_detectors[class_index][detector_index])
+            # If the distance is less than or equal to the threshold, the detector recognizes
+            # the sample as non-self.
+            if distance <= aff_thresh:
+                class_found = False
+                break
+            total_distance += distance
+        # if the sample is self for the class
+        if class_found:
+            avg_distance = total_distance / n_detectors
+            # Choose the class with the largest average distance.
+            if avg_distance > best_avg_distance:
+                best_avg_distance = avg_distance
+                best_class_idx = class_index
+    return best_class_idx
+@njit(
+    [(
+        types.float64[:, :], types.float64[:],
+        types.float64, types.int32, types.float64
+    )],
+    cache=True
+)
+def check_detector_rnsa_validity(
+    x_class: npt.NDArray,
+    vector_x: npt.NDArray,
+    threshold: float,
+    metric: int,
+    p: float
+) -> bool:
+    """
+    Checks the validity of a candidate detector (vector_x) against samples from a class (x_class)
+    using the Hamming distance. A detector is considered INVALID if its distance to any sample
+    in ``x_class`` is less than or equal to ``aff_thresh``.
+    Parameters
+    ----------
+    * x_class (``npt.NDArray``): Array containing the class samples. Expected shape:
+        (n_samples, n_features).
+    * vector_x (``npt.NDArray``): Array representing the detector. Expected shape: (n_features,).
+    * threshold (``float``): threshold.
+    * metric (``int``): Distance metric to be used. Available options:
+        [0 (Euclidean), 1 (Manhattan), 2 (Minkowski)].
+    * p (``float``): Parameter for the Minkowski distance (used only if `metric`
+    is "minkowski").
+    Returns
+    ----------
+    * True if the detector is valid, False otherwise.
+    """
+    n = x_class.shape[1]
+    if n != vector_x.shape[0]:
+        return False
+    for i in range(x_class.shape[0]):
+        distance = compute_metric_distance(vector_x, x_class[i], metric, p)
+        if distance <= threshold:
+            return False
+    return True

aisp/utils/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
+"""Utility functions and helpers for development."""
 from ._multiclass import slice_index_list_by_class
 __author__ = "João Paulo da Silva Barros"
 __all__ = ["slice_index_list_by_class"]
-__version__ = "0.1.33"
+__version__ = "0.1.35"

aisp/utils/_multiclass.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Utility functions for handling classes with multiple categories."""
 from typing import Union
 import numpy as np
 import numpy.typing as npt
@@ -5,37 +7,21 @@ import numpy.typing as npt
 def slice_index_list_by_class(classes: Union[npt.NDArray, list], y: npt.NDArray) -> dict:
     """
-    The function ``__slice_index_list_by_class(...)``, separates the indices of the lines \
-    according to the output class, to loop through the sample array, only in positions where \
-    the output is the class being trained.
-    Parameters:
-    ---
-        * classes (``list or npt.NDArray``): list with unique classes.
-        * y (npt.NDArray): Receives a ``y``[``N sample``] array with the output classes of the \
-            ``X`` sample array.
-    returns:
-    ---
-        * dict: A dictionary with the list of array positions(``y``), with the classes as key.
-    ---
-    A função ``__slice_index_list_by_class(...)``, separa os índices das linhas conforme a \
-    classe de saída, para percorrer o array de amostra, apenas nas posições que a saída for \
-    a classe que está sendo treinada.
-    Parameters:
-    ---
-        * classes (``list or npt.NDArray``): lista com classes únicas.
-        * y (npt.NDArray): Recebe um array ``y``[``N amostra``] com as classes de saída do \
-            array de amostra ``X``.
-    Returns:
-    ---
-        * dict: Um dicionário com a lista de posições do array(``y``), com as classes como chave.
+    The function ``slice_index_list_by_class(...)``, separates the indices of the lines according
+    to the output class, to loop through the sample array, only in positions where the output is the
+    class being trained.
+    Parameters
+    ----------
+    * classes (``list or npt.NDArray``): list with unique classes.
+    * y (``npt.NDArray``): Receives a ``y``[``N sample``] array with the output classes of the
+        ``X`` sample array.
+    returns
+    ----------
+    * dict: A dictionary with the list of array positions(``y``), with the classes as key.
     """
-    position_samples = dict()
+    position_samples = {}
     for _class_ in classes:
         # Gets the sample positions by class from y.
         position_samples[_class_] = list(np.nonzero(y == _class_)[0])

aisp/utils/distance.py ADDED Viewed

@@ -0,0 +1,215 @@
+"""Utility functions for normalized distance between arrays with numba decorators."""
+import numpy as np
+import numpy.typing as npt
+from numba import njit, types
+EUCLIDEAN = 0
+MANHATTAN = 1
+MINKOWSKI = 2
+HAMMING = 3
+@njit([(types.boolean[:], types.boolean[:])], cache=True)
+def hamming(u: npt.NDArray[np.bool_], v: npt.NDArray[np.bool_]) -> np.float64:
+    """
+    Function to calculate the normalized Hamming distance between two points.
+    ((x₁ ≠ x₂) + (y₁ ≠ y₂) + ... + (yn ≠ yn)) / n
+    Parameters
+    ----------
+    * u (``npt.NDArray``): Coordinates of the first point.
+    * v (``npt.NDArray``): Coordinates of the second point.
+    returns
+    ----------
+    * Distance (``float``) between the two points.
+    """
+    n = len(u)
+    if n == 0:
+        return 0.0
+    return np.sum(u != v) / n
+@njit()
+def euclidean(u: npt.NDArray[np.float64], v: npt.NDArray[np.float64]) -> np.float64:
+    """
+    Function to calculate the normalized Euclidean distance between two points.
+    √( (x₁ – x₂)² + (y₁ – y₂)² + ... + (yn – yn)²)
+    Parameters
+    ----------
+    * u (``npt.NDArray``): Coordinates of the first point.
+    * v (``npt.NDArray``): Coordinates of the second point.
+    returns
+    ----------
+    * Distance (``float``) between the two points.
+    """
+    return np.linalg.norm(u - v)
+@njit()
+def cityblock(u: npt.NDArray[np.float64], v: npt.NDArray[np.float64]) -> np.float64:
+    """
+    Function to calculate the normalized Manhattan distance between two points.
+    (|x₁ – x₂| + |y₁ – y₂| + ... + |yn – yn|) / n
+    Parameters
+    ----------
+    * u (``npt.NDArray``): Coordinates of the first point.
+    * v (``npt.NDArray``): Coordinates of the second point.
+    returns
+    ----------
+    * Distance (``float``) between the two points.
+    """
+    n = len(u)
+    if n == 0:
+        return -1.0
+    return np.sum(np.abs(u - v)) / n
+@njit()
+def minkowski(u: npt.NDArray[np.float64], v: npt.NDArray[np.float64], p: float = 2.0):
+    """
+    Function to calculate the normalized Minkowski distance between two points.
+    (( |X₁ – Y₁|p + |X₂ – Y₂|p + ... + |Xn – Yn|p) ¹/ₚ.) / n
+    Parameters
+    ----------
+    * u (``npt.NDArray``): Coordinates of the first point.
+    * v (``npt.NDArray``): Coordinates of the second point.
+    * p float: The p parameter defines the type of distance to be calculated:
+        - p = 1: **Manhattan** distance — sum of absolute differences.
+        - p = 2: **Euclidean** distance — sum of squared differences (square root).
+        - p > 2: **Minkowski** distance with an increasing penalty as p increases.
+    returns
+    ----------
+    * Distance (``float``) between the two points.
+    """
+    n = len(u)
+    if n == 0:
+        return -1.0
+    return (np.sum(np.abs(u - v) ** p) ** (1 / p)) / n
+@njit(
+    [(
+        types.float64[:], types.float64[:],
+        types.int32, types.float64
+    )],
+    cache=True
+)
+def compute_metric_distance(
+    u: npt.NDArray[np.float64],
+    v: npt.NDArray[np.float64],
+    metric: int,
+    p: np.float64 = 2.0
+) -> np.float64:
+    """
+    Function to calculate the distance between two points by the chosen ``metric``.
+    Parameters
+    ----------
+    * u (``npt.NDArray``): Coordinates of the first point.
+    * v (``npt.NDArray``): Coordinates of the second point.
+    * metric (``int``): Distance metric to be used. Available options:
+    [0 (Euclidean), 1 (Manhattan), 2 (Minkowski)]
+    * p (``float``): Parameter for the Minkowski distance (used only if `metric`
+    is "minkowski").
+    returns
+    ----------
+    * Distance (``double``) between the two points with the selected metric.
+    """
+    if metric == MANHATTAN:
+        return cityblock(u, v)
+    if metric == MINKOWSKI:
+        return minkowski(u, v, p)
+    return euclidean(u, v)
+@njit(
+    [(
+        types.float64[:, :], types.float64[:],
+        types.int32, types.float64
+    )],
+    cache=True
+)
+def min_distance_to_class_vectors(
+    x_class: npt.NDArray[np.float64],
+    vector_x: npt.NDArray[np.float64],
+    metric: int,
+    p: float = 2.0
+) -> float:
+    """
+    Calculates the minimum distance between an input vector and the vectors of a class.
+    Parameters
+    ----------
+    * x_class (``npt.NDArray``): Array containing the class vectors to be compared
+    with the input vector. Expected shape: (n_samples, n_features).
+    * vector_x (``npt.NDArray``): Vector to be compared with the class vectors.
+    Expected shape: (n_features,).
+    * metric (``str``): Distance metric to be used. Available options:
+    ["hamming", "cityblock", "minkowski", "euclidean"]
+    * p (``float``): Parameter for the Minkowski distance (used only if `metric`
+    is "minkowski").
+    Returns
+    ----------
+    * float: The minimum distance calculated between the input vector and the class vectors.
+    * Returns -1.0 if the input dimensions are incompatible.
+    """
+    n = x_class.shape[1]
+    if n != vector_x.shape[0]:
+        return -1.0
+    min_distance = np.inf
+    for i in range(x_class.shape[0]):
+        distance = compute_metric_distance(vector_x, x_class[i], metric, p)
+        min_distance = min(min_distance, distance)
+    return min_distance
+def get_metric_code(metric: str) -> int:
+    """
+    Returns the numeric code associated with a distance metric.
+    Parameters
+    ----------
+    * metric (str): Name of the metric. Can be "euclidean", "manhattan", "minkowski" or "hamming".
+    Raises
+    ----------
+    * ValueError: If the metric provided is not supported.
+    Returns
+    ----------
+    * int: Numeric code corresponding to the metric.
+    """
+    metric_map = {
+        "euclidean": EUCLIDEAN,
+        "manhattan": MANHATTAN,
+        "minkowski": MINKOWSKI,
+        "hamming": HAMMING
+    }
+    normalized_metric = metric.strip().lower()
+    if normalized_metric not in metric_map:
+        supported = "', '".join(metric_map.keys())
+        raise ValueError(f"Unknown metric: '{metric}'. Supported: {supported}")
+    return metric_map[normalized_metric]

aisp/utils/metrics.py CHANGED Viewed

@@ -1,61 +1,40 @@
+"""Utility functions for measuring accuracy and performance."""
 from typing import Union
 import numpy as np
 import numpy.typing as npt
 def accuracy_score(
-        y_true: Union[npt.NDArray, list],
-        y_pred: Union[npt.NDArray, list]
+    y_true: Union[npt.NDArray, list],
+    y_pred: Union[npt.NDArray, list]
 ) -> float:
     """
-    Function to calculate precision accuracy based on lists of true labels and
-    predicted labels.
-    Parameters:
-    ---
-        * y_true (``Union[npt.NDArray, list]``): Ground truth (correct) labels. \
-            Expected to be of the same length as `y_pred`.
-        * y_pred (``Union[npt.NDArray, list]``): Predicted labels. Expected to \
-            be of the same length as `y_true`.
-    Returns:
-    ---
-        * Accuracy (``float``): The ratio of correct predictions to the total \
-        number of predictions.
-    Raises:
-    ---
-        * ValueError: If `y_true` or `y_pred` are empty or if they do not have the same length.
-    ---
-    Função para calcular a acurácia de precisão com base em listas de rótulos
-    verdadeiros e nos rótulos previstos.
-    Parâmetros:
-    ---
-        * y_true (``Union[npt.NDArray, list]``): Rótulos verdadeiros (corretos)..
-        * y_pred (``Union[npt.NDArray, list]``): Rótulos previstos.
-    Retornos:
-    ---
-        * Precisão (``float``): A proporção de previsões corretas em relação
-        ao número total de previsões.
-    Lança:
-    ---
-        * ValueError: Se `y_true` ou `y_pred` estiverem vazios ou se não
-        tiverem o mesmo tamanho.
+    Function to calculate the accuracy score based on true and predicted labels.
+    Parameters
+    ----------
+    * y_true ()``Union[npt.NDArray, list]``):
+        Ground truth (correct) labels. Expected to be of the same length as `y_pred`.
+    * y_pred (``Union[npt.NDArray, list]``):
+        Predicted labels. Expected to be of the same length as `y_true`.
+    Returns
+    ----------
+    * float: The ratio of correct predictions to the total number of predictions.
+    Raises
+    ----------
+    * ValueError: If `y_true` or `y_pred` are empty or if they do not have the same length.
     """
     n = len(y_true)
     if n == 0:
         raise ValueError(
             "Division by zero: y_true cannot be an empty list or array."
         )
-    elif n != len(y_pred):
+    if n != len(y_pred):
         raise ValueError(
             f"Error: The arrays must have the same size. Size of y_true: "
             f"{len(y_true)}, Size of y_pred: {len(y_pred)}"
         )
-    return np.sum(np.sum(np.array(y_true) == np.array(y_pred))) / n
+    return np.sum(np.array(y_true) == np.array(y_pred)) / n

aisp/utils/sanitizers.py ADDED Viewed

@@ -0,0 +1,55 @@
+"""Utility functions for validation and treatment of parameters."""
+from typing import TypeVar, Iterable, Callable, Any, Optional
+T = TypeVar('T')
+def sanitize_choice(value: T, valid_choices: Iterable[T], default: T) -> T:
+    """
+    Returns the value if it is present in the set of valid choices; otherwise,
+    returns the default value.
+    Parameters
+    ----------
+    * value (``T``): The value to be checked.
+    * valid_choices (``Iterable[T]``): A collection of valid choices.
+    * default: The default value to be returned if 'value' is not in 'valid_choices'.
+    Returns
+    ----------
+    * The original value if valid, or the default value if not.
+    """
+    return value if value in valid_choices else default
+def sanitize_param(value: T, default: T, condition: Callable[[T], bool]) -> T:
+    """
+    Returns the value if it satisfies the specified condition; otherwise, returns the default value.
+    Parameters
+    ----------
+    * value: The value to be checked.
+    * default (``T``): The default value to be returned if the condition is not satisfied.
+    * condition (``Callable[[T], bool]``): A function that takes a value and returns a boolean,
+        determining if the value is valid.
+    Returns
+    ----------
+    * T: The original value if the condition is satisfied, or the default value if not.
+    """
+    return value if condition(value) else default
+def sanitize_seed(seed: Any) -> Optional[int]:
+    """
+    Returns the seed if it is a non-negative integer; otherwise, returns None.
+    Parameters
+    ----------
+    * seed (``Any``): The seed value to be validated.
+    Returns
+    ----------
+    * Optional[int]: The original seed if it is a non-negative integer, or None if it is invalid.
+    """
+    return seed if isinstance(seed, int) and seed >= 0 else None

aisp 0.1.34__py3-none-any.whl → 0.1.40__py3-none-any.whl

aisp 0.1.34py3-none-any.whl → 0.1.40py3-none-any.whl