PyPI - deskit - Versions diffs - 1.2.1__tar.gz → 1.2.3__tar.gz - Mend

deskit 1.2.1tar.gz → 1.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

{deskit-1.2.1/src/deskit.egg-info → deskit-1.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deskit
-Version: 1.2.1
+Version: 1.2.3
 Summary: A Python library for Dynamic Ensemble Selection
 Author: Tikhon Vodyanov
 License-Expression: MIT

{deskit-1.2.1 → deskit-1.2.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "deskit"
-version = "1.2.1"
+version = "1.2.3"
 description = "A Python library for Dynamic Ensemble Selection"
 readme = "README.md"
 license = "MIT"

deskit-1.2.3/src/deskit/base/knnbase.py ADDED Viewed

@@ -0,0 +1,135 @@
+from deskit.base.base import BaseRouter
+from deskit.base.predictbase import PredictBase
+import numpy as np
+class KNNBase(PredictBase, BaseRouter):
+    """
+    Base for KNN-based DES algorithms.
+    Inheriting PredictBase gives every subclass the public
+    predict() and predict_weights() API automatically.
+    Subclasses must implement _weights_batch().
+    """
+    def __init__(self, metric, mode='max', neighbor_finder=None, task='classification'):
+        """
+        Parameters
+        ----------
+        metric : callable
+            Per-sample scoring function: (y_true, y_pred) -> float.
+        mode : str
+            'max' if higher scores are better, 'min' if lower.
+        neighbor_finder : NeighborFinder
+            Backend used for neighborhood queries.
+        """
+        self.metric          = metric
+        self.mode            = mode
+        self.model           = neighbor_finder
+        self.matrix          = None   # (n_val, n_models); higher is always better
+        self.models          = None   # ordered list of model names
+        self.task = task
+    def _compute_scores(self, y, preds):
+        """
+        Return a 1D array of per-sample metric scores.
+        preds may be 1D (scalar predictions) or 2D (probability arrays, one
+        row per sample).
+        """
+        preds = np.asarray(preds)
+        if preds.ndim == 2:
+            return np.array([self.metric(y[i], preds[i]) for i in range(len(y))])
+        return np.vectorize(self.metric)(y, preds)
+    def fit(self, features, y, preds_dict):
+        """
+        Build the score matrix and fit the neighbor index.
+        This method expects pre-validated numpy arrays.
+        """
+        self.models = list(preds_dict.keys())
+        n_val       = len(y)
+        n_models    = len(self.models)
+        self.matrix = np.zeros((n_val, n_models))
+        for j, name in enumerate(self.models):
+            scores = self._compute_scores(y, preds_dict[name])
+            self.matrix[:, j] = scores if self.mode == 'max' else -scores
+        self.model.fit(features)
+    def _kneighbors(self, x, k=None, loo=False):
+        """
+        Query the fitted neighbor index, with optional leave-one-out (LOO)
+        exclusion of each query point's own occurrence in the DSEL.
+        Set loo=True when ``x`` is (part of) the same data this model was
+        fit on -- e.g. while tuning k / threshold / temperature directly on
+        the DSEL -- so a point doesn't end up neighboring itself at distance
+        0, which would otherwise dominate the routing.
+        Parameters
+        ----------
+        x : np.ndarray, shape (batch, n_features)
+        k : int, optional
+            Neighborhood size. None defers to the finder's own default.
+        loo : bool
+            If True, query one extra neighbor per row and drop the
+            zero-distance match (the point itself) when present, so the
+            returned neighborhood still has the size a normal call would
+            have produced. Rows with no zero-distance match (e.g. ``x``
+            isn't actually part of the fitted DSEL) fall back to dropping
+            the farthest neighbor instead, so shapes stay consistent.
+        Returns
+        -------
+        distances, indices : np.ndarray, each shape (batch, k_eff)
+        """
+        if not loo:
+            return self.model.kneighbors(x, k=k)
+        # Different backends store their default k under different
+        # attribute names (n_neighbors vs k), so rather than guessing,
+        # resolve the effective k with one cheap probe call when it isn't
+        # given explicitly. Costs one extra query only in that case.
+        if k is None:
+            probe_distances, _ = self.model.kneighbors(x, k=k)
+            k = probe_distances.shape[1]
+        distances, indices = self.model.kneighbors(x, k=k + 1)
+        return _drop_self_match(distances, indices, k)
+def _drop_self_match(distances, indices, k, eps=1e-6):
+    """
+    Drop one zero-distance neighbor per row from (batch, k+1) neighbor
+    results, returning (batch, k) arrays.
+    A query point's own occurrence in the DSEL, if present, is always the
+    closest possible neighbor (distance 0 is the global minimum for any
+    proper distance metric), so it's identified per row as whichever
+    column is nearest, gated on that distance being ~0. Rows without a
+    zero-distance match (point not actually in the DSEL) drop the
+    farthest neighbor instead, so every row keeps exactly k entries.
+    Note: if the DSEL contains true duplicate feature rows, only one
+    occurrence is dropped per query point -- the duplicates remain valid,
+    distinct neighbors. Distance order within each row need not be
+    sorted; this works either way.
+    """
+    batch = distances.shape[0]
+    rows = np.arange(batch)
+    nearest_col = np.argmin(distances, axis=1)
+    is_self_match = distances[rows, nearest_col] < eps
+    farthest_col = np.argmax(distances, axis=1)
+    drop_col = np.where(is_self_match, nearest_col, farthest_col)
+    keep = np.ones(distances.shape, dtype=bool)
+    keep[rows, drop_col] = False
+    new_distances = distances[keep].reshape(batch, k)
+    new_indices = indices[keep].reshape(batch, k)
+    return new_distances, new_indices

{deskit-1.2.1 → deskit-1.2.3}/src/deskit/base/predictbase.py RENAMED Viewed

@@ -42,6 +42,11 @@ class PredictBase:
         **kwargs
             Additional per-call overrides forwarded to _weights_batch.
             Supported by most algorithms: ``threshold=<float>``.
+            KNN-based algorithms also support ``loo=<bool>`` (default
+            False): set True when X_test is (part of) the same data the
+            model was fit on -- e.g. while tuning hyperparameters on the
+            DSEL -- so each point's own occurrence is excluded from its
+            neighborhood instead of trivially matching itself at distance 0.
         Returns
         -------
@@ -79,7 +84,9 @@ class PredictBase:
         temperature : float, optional
             Forwarded to _weights_batch.
         **kwargs
-            Forwarded to _weights_batch (e.g. ``threshold=<float>``).
+            Forwarded to _weights_batch (e.g. ``threshold=<float>``, or
+            ``loo=<bool>`` for leave-one-out neighbor exclusion when
+            X_test is drawn from the fit DSEL -- see predict_weights).
         Returns
         -------

{deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/dewsi.py RENAMED Viewed

@@ -38,6 +38,13 @@ class DEWSI(KNNBase):
     distance_metric : str
         Distance function to use for neighbor search. Default: 'euclidean'. See
         neighbors.list_distance_metrics() for all options and per-backend availability.
+    Notes
+    -----
+    predict() and predict_weights() accept an optional ``loo=True`` keyword
+    for hyperparameter tuning directly on the DSEL this model was fit on:
+    it excludes each query point's own occurrence from its neighborhood so
+    it doesn't trivially neighbor itself at distance 0.
     """
     def __init__(self, task, metric='mae', mode='min', k=10,
@@ -69,17 +76,23 @@ class DEWSI(KNNBase):
         )
         super().fit(features, y, preds_dict)
-    def _weights_batch(self, x, temperature=None, threshold=None):
+    def _weights_batch(self, x, temperature=None, threshold=None, k=None, loo=False):
         """
         Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
         Returns (batch, n_models) weight array.
+        loo : bool
+            Leave-one-out. Set True when x is (part of) the DSEL this model
+            was fit on -- e.g. while tuning hyperparameters -- so each
+            point's own occurrence is excluded from its neighborhood
+            instead of trivially matching itself at distance 0.
         """
         t  = temperature if temperature is not None else (
              self._temperature if self._temperature is not None else
              (0.5 if self.mode == 'min' else 1.0))
         th = threshold if threshold is not None else self.threshold
-        distances, indices = self.model.kneighbors(x)               # both (batch, k)
+        distances, indices = self._kneighbors(x, k=k, loo=loo)           # both (batch, k)
         # Inverse-distance-weighted average of each model's scores over K neighbors
         inv_dist   = 1.0 / np.maximum(distances, 1e-8)              # (batch, k)

{deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/dewsiv.py RENAMED Viewed

@@ -84,7 +84,7 @@ class DEWSIV(KNNBase):
                 preds = np.asarray(preds_dict[name])
                 self._var_matrix[:, j] = np.vectorize(_signed_residual)(y, preds)
-    def _weights_batch(self, x, temperature=None, threshold=None):
+    def _weights_batch(self, x, temperature=None, threshold=None, k=None, loo=False):
         """
         Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
         Returns (batch, n_models) weight array.
@@ -94,7 +94,7 @@ class DEWSIV(KNNBase):
              (0.5 if self.mode == 'min' else 1.0))
         th = threshold if threshold is not None else self.threshold
-        distances, indices = self.model.kneighbors(x)                # both (batch, k)
+        distances, indices = self.model.kneighbors(x, k=k, loo=loo)                # both (batch, k)
         # Inverse-distance weights
         inv_dist   = 1.0 / np.maximum(distances, 1e-8)               # (batch, k)

{deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/dewst.py RENAMED Viewed

@@ -86,7 +86,7 @@ class DEWST(KNNBase):
         )
         super().fit(features, y, preds_dict)
-    def _weights_batch(self, x, temperature=None, threshold=None):
+    def _weights_batch(self, x, temperature=None, threshold=None, k=None, r2_threshold=None, loo=False):
         """
         Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
         Returns (batch, n_models) weight array.
@@ -94,9 +94,10 @@ class DEWST(KNNBase):
         t  = temperature if temperature is not None else (
              self._temperature if self._temperature is not None else
              (0.5 if self._real_mode == 'min' else 1.0))
-        th = threshold if threshold is not None else self.threshold
+        th    = threshold    if threshold    is not None else self.threshold
+        r2_th = r2_threshold if r2_threshold is not None else self.r2_threshold
-        distances, indices = self.model.kneighbors(x)               # (batch, k)
+        distances, indices = self.model.kneighbors(x, k=k, loo=loo)          # (batch, k)
         k = distances.shape[1]
         # Inverse-distance weights
@@ -157,7 +158,7 @@ class DEWST(KNNBase):
             trend_scores = intercept
         # Blend: trust trend where R² ≥ threshold, fall back otherwise
-        use_trend  = r2 >= self.r2_threshold
+        use_trend  = r2 >= r2_th
         avg_scores = np.where(use_trend, trend_scores, dewsi_scores)
         # Standard DEWS softmax

{deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/dewsu.py RENAMED Viewed

@@ -65,7 +65,7 @@ class DEWSU(KNNBase):
         )
         super().fit(features, y, preds_dict)
-    def _weights_batch(self, x, temperature=None, threshold=None):
+    def _weights_batch(self, x, temperature=None, threshold=None, k=None, loo=False):
         """
         Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
         Returns (batch, n_models) weight array.
@@ -75,7 +75,7 @@ class DEWSU(KNNBase):
              (0.5 if self.mode == 'min' else 1.0))
         th = threshold if threshold is not None else self.threshold
-        _, indices = self.model.kneighbors(x)                        # (batch, k)
+        _, indices = self.model.kneighbors(x, k=k, loo=loo)                        # (batch, k)
         # Average each model's scores over the K neighbors
         avg_scores = self.matrix[indices].mean(axis=1)               # (batch, n_models)

{deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/dewsv.py RENAMED Viewed

@@ -84,7 +84,7 @@ class DEWSV(KNNBase):
                 preds = np.asarray(preds_dict[name])
                 self._var_matrix[:, j] = np.vectorize(_signed_residual)(y, preds)
-    def _weights_batch(self, x, temperature=None, threshold=None):
+    def _weights_batch(self, x, temperature=None, threshold=None, k=None, loo=False):
         """
         Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
         Returns (batch, n_models) weight array.
@@ -94,7 +94,7 @@ class DEWSV(KNNBase):
              (0.5 if self.mode == 'min' else 1.0))
         th = threshold if threshold is not None else self.threshold
-        _, indices = self.model.kneighbors(x)                        # (batch, k)
+        _, indices = self.model.kneighbors(x, k=k, loo=loo)                        # (batch, k)
         # Uniform average of each model's scores over K neighbors
         neighbor_scores = self.matrix[indices]                        # (batch, k, n_models)

{deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/knorae.py RENAMED Viewed

@@ -55,7 +55,7 @@ class KNORAE(KNNBase):
         )
         super().fit(features, y, preds_dict)
-    def _weights_batch(self, x, temperature=None, threshold=None):
+    def _weights_batch(self, x, temperature=None, threshold=None, k=None, loo=False):
         """
         Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
         Returns (batch, n_models) weight array.
@@ -64,7 +64,7 @@ class KNORAE(KNNBase):
         th       = threshold if threshold is not None else self.threshold
         n_models = len(self.models)
-        _, indices      = self.model.kneighbors(x)
+        _, indices      = self.model.kneighbors(x, k=k, loo=loo)
         k               = indices.shape[1]
         neighbor_scores = self.matrix[indices]                        # (batch, k, n_models)

{deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/knoraiu.py RENAMED Viewed

@@ -56,7 +56,7 @@ class KNORAIU(KNNBase):
         )
         super().fit(features, y, preds_dict)
-    def _weights_batch(self, x, temperature=None, threshold=None):
+    def _weights_batch(self, x, temperature=None, threshold=None, k=None, loo=False):
         """
         Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
         Returns (batch, n_models) weight array.
@@ -64,7 +64,7 @@ class KNORAIU(KNNBase):
         """
         th = threshold if threshold is not None else self.threshold
-        distances, indices = self.model.kneighbors(x)                # both (batch, k)
+        distances, indices = self.model.kneighbors(x, k=k, loo=loo)                # both (batch, k)
         neighbor_scores    = self.matrix[indices]                     # (batch, k, n_models)
         # Normalize per neighbor: best model = 1.0, worst = 0.0

{deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/knorau.py RENAMED Viewed

@@ -56,7 +56,7 @@ class KNORAU(KNNBase):
         )
         super().fit(features, y, preds_dict)
-    def _weights_batch(self, x, temperature=None, threshold=None):
+    def _weights_batch(self, x, temperature=None, threshold=None, k=None, loo=False):
         """
         Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
         Returns (batch, n_models) weight array.
@@ -64,7 +64,7 @@ class KNORAU(KNNBase):
         """
         th = threshold if threshold is not None else self.threshold
-        _, indices      = self.model.kneighbors(x)
+        _, indices      = self.model.kneighbors(x, k=k, loo=loo)
         neighbor_scores = self.matrix[indices]                        # (batch, k, n_models)
         # Normalize per neighbor: best model = 1.0, worst = 0.0

{deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/lwsei.py RENAMED Viewed

@@ -74,7 +74,7 @@ class LWSEI(PredictBase):
         self._y_val = y
         self._finder.fit(features)
-    def _weights_batch(self, x, temperature=None, **kwargs):
+    def _weights_batch(self, x, temperature=None, k=None, loo=False, **kwargs):
         """
         Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
         Returns (batch, n_models) weight array.
@@ -87,7 +87,7 @@ class LWSEI(PredictBase):
         n_models   = len(self.models)
         uniform    = np.full(n_models, 1.0 / n_models)
-        distances, indices = self._finder.kneighbors(x)              # (batch, k)
+        distances, indices = self._finder.kneighbors(x, k=k, loo=loo)              # (batch, k)
         weights_out        = np.empty((batch_size, n_models))
         for b in range(batch_size):

{deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/lwseu.py RENAMED Viewed

@@ -74,7 +74,7 @@ class LWSEU(PredictBase):
         self._y_val = y
         self._finder.fit(features)
-    def _weights_batch(self, x, temperature=None, **kwargs):
+    def _weights_batch(self, x, temperature=None, k=None, loo=False, **kwargs):
         """
         Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
         Returns (batch, n_models) weight array.
@@ -87,7 +87,7 @@ class LWSEU(PredictBase):
         n_models   = len(self.models)
         uniform    = np.full(n_models, 1.0 / n_models)
-        distances, indices = self._finder.kneighbors(x)              # (batch, k)
+        distances, indices = self._finder.kneighbors(x, k=k, loo=loo)              # (batch, k)
         weights_out        = np.empty((batch_size, n_models))
         for b in range(batch_size):

{deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/ola.py RENAMED Viewed

@@ -54,7 +54,7 @@ class OLA(KNNBase):
         if mat_max > mat_min:
             self.matrix = (self.matrix - mat_min) / (mat_max - mat_min)
-    def _weights_batch(self, x, temperature=None, threshold=None):
+    def _weights_batch(self, x, temperature=None, threshold=None, k=None, loo=False):
         """
         Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
         Returns (batch, n_models) weight array.
@@ -63,7 +63,7 @@ class OLA(KNNBase):
         """
         batch_size = x.shape[0]
-        _, indices  = self.model.kneighbors(x)
+        _, indices  = self.model.kneighbors(x, k=k, loo=loo)
         avg_scores  = self.matrix[indices].mean(axis=1)               # (batch, n_models)
         best_indices = np.argmax(avg_scores, axis=1)

{deskit-1.2.1 → deskit-1.2.3}/src/deskit/neighbors.py RENAMED Viewed

@@ -89,11 +89,6 @@ _NMSLIB_METRIC_MAP = {
     'dot':        'negdotprod',
 }
-# Unified view for HNSWNeighborFinder validation: union of both backends.
-# We keep the old name for backwards compatibility.
-_HNSW_METRIC_MAP = _HNSWLIB_METRIC_MAP   # kept for any external references
-_HNSW_METRICS = _UNIVERSAL_METRICS  # partial — see fit() for fallback note
 # All metrics callable from the public API.
 ALL_METRICS = _KNN_METRICS | {'jensenshannon', 'dot'}

{deskit-1.2.1 → deskit-1.2.3/src/deskit.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deskit
-Version: 1.2.1
+Version: 1.2.3
 Summary: A Python library for Dynamic Ensemble Selection
 Author: Tikhon Vodyanov
 License-Expression: MIT

deskit-1.2.1/src/deskit/base/knnbase.py DELETED Viewed

@@ -1,60 +0,0 @@
-from deskit.base.base import BaseRouter
-from deskit.base.predictbase import PredictBase
-import numpy as np
-class KNNBase(PredictBase, BaseRouter):
-    """
-    Base for KNN-based DES algorithms.
-    Inheriting PredictBase gives every subclass the public
-    predict() and predict_weights() API automatically.
-    Subclasses must implement _weights_batch().
-    """
-    def __init__(self, metric, mode='max', neighbor_finder=None, task='classification'):
-        """
-        Parameters
-        ----------
-        metric : callable
-            Per-sample scoring function: (y_true, y_pred) -> float.
-        mode : str
-            'max' if higher scores are better, 'min' if lower.
-        neighbor_finder : NeighborFinder
-            Backend used for neighborhood queries.
-        """
-        self.metric          = metric
-        self.mode            = mode
-        self.model           = neighbor_finder
-        self.matrix          = None   # (n_val, n_models); higher is always better
-        self.models          = None   # ordered list of model names
-        self.task = task
-    def _compute_scores(self, y, preds):
-        """
-        Return a 1D array of per-sample metric scores.
-        preds may be 1D (scalar predictions) or 2D (probability arrays, one
-        row per sample).
-        """
-        preds = np.asarray(preds)
-        if preds.ndim == 2:
-            return np.array([self.metric(y[i], preds[i]) for i in range(len(y))])
-        return np.vectorize(self.metric)(y, preds)
-    def fit(self, features, y, preds_dict):
-        """
-        Build the score matrix and fit the neighbor index.
-        This method expects pre-validated numpy arrays.
-        """
-        self.models = list(preds_dict.keys())
-        n_val       = len(y)
-        n_models    = len(self.models)
-        self.matrix = np.zeros((n_val, n_models))
-        for j, name in enumerate(self.models):
-            scores = self._compute_scores(y, preds_dict[name])
-            self.matrix[:, j] = scores if self.mode == 'max' else -scores
-        self.model.fit(features)