deskit 1.2.1__tar.gz → 1.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deskit-1.2.1/src/deskit.egg-info → deskit-1.2.3}/PKG-INFO +1 -1
- {deskit-1.2.1 → deskit-1.2.3}/pyproject.toml +1 -1
- deskit-1.2.3/src/deskit/base/knnbase.py +135 -0
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/base/predictbase.py +8 -1
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/dewsi.py +15 -2
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/dewsiv.py +2 -2
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/dewst.py +5 -4
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/dewsu.py +2 -2
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/dewsv.py +2 -2
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/knorae.py +2 -2
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/knoraiu.py +2 -2
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/knorau.py +2 -2
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/lwsei.py +2 -2
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/lwseu.py +2 -2
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/ola.py +2 -2
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/neighbors.py +0 -5
- {deskit-1.2.1 → deskit-1.2.3/src/deskit.egg-info}/PKG-INFO +1 -1
- deskit-1.2.1/src/deskit/base/knnbase.py +0 -60
- {deskit-1.2.1 → deskit-1.2.3}/LICENSE +0 -0
- {deskit-1.2.1 → deskit-1.2.3}/README.md +0 -0
- {deskit-1.2.1 → deskit-1.2.3}/setup.cfg +0 -0
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/__init__.py +0 -0
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/_config.py +0 -0
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/base/__init__.py +0 -0
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/base/base.py +0 -0
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/des/__init__.py +0 -0
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/metrics.py +0 -0
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/router.py +0 -0
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit/utils.py +0 -0
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit.egg-info/SOURCES.txt +0 -0
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit.egg-info/dependency_links.txt +0 -0
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit.egg-info/requires.txt +0 -0
- {deskit-1.2.1 → deskit-1.2.3}/src/deskit.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
from deskit.base.base import BaseRouter
|
|
2
|
+
from deskit.base.predictbase import PredictBase
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class KNNBase(PredictBase, BaseRouter):
|
|
7
|
+
"""
|
|
8
|
+
Base for KNN-based DES algorithms.
|
|
9
|
+
|
|
10
|
+
Inheriting PredictBase gives every subclass the public
|
|
11
|
+
predict() and predict_weights() API automatically.
|
|
12
|
+
Subclasses must implement _weights_batch().
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, metric, mode='max', neighbor_finder=None, task='classification'):
|
|
16
|
+
"""
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
metric : callable
|
|
20
|
+
Per-sample scoring function: (y_true, y_pred) -> float.
|
|
21
|
+
mode : str
|
|
22
|
+
'max' if higher scores are better, 'min' if lower.
|
|
23
|
+
neighbor_finder : NeighborFinder
|
|
24
|
+
Backend used for neighborhood queries.
|
|
25
|
+
"""
|
|
26
|
+
self.metric = metric
|
|
27
|
+
self.mode = mode
|
|
28
|
+
self.model = neighbor_finder
|
|
29
|
+
self.matrix = None # (n_val, n_models); higher is always better
|
|
30
|
+
self.models = None # ordered list of model names
|
|
31
|
+
self.task = task
|
|
32
|
+
|
|
33
|
+
def _compute_scores(self, y, preds):
|
|
34
|
+
"""
|
|
35
|
+
Return a 1D array of per-sample metric scores.
|
|
36
|
+
|
|
37
|
+
preds may be 1D (scalar predictions) or 2D (probability arrays, one
|
|
38
|
+
row per sample).
|
|
39
|
+
"""
|
|
40
|
+
preds = np.asarray(preds)
|
|
41
|
+
if preds.ndim == 2:
|
|
42
|
+
return np.array([self.metric(y[i], preds[i]) for i in range(len(y))])
|
|
43
|
+
return np.vectorize(self.metric)(y, preds)
|
|
44
|
+
|
|
45
|
+
def fit(self, features, y, preds_dict):
|
|
46
|
+
"""
|
|
47
|
+
Build the score matrix and fit the neighbor index.
|
|
48
|
+
|
|
49
|
+
This method expects pre-validated numpy arrays.
|
|
50
|
+
"""
|
|
51
|
+
self.models = list(preds_dict.keys())
|
|
52
|
+
n_val = len(y)
|
|
53
|
+
n_models = len(self.models)
|
|
54
|
+
self.matrix = np.zeros((n_val, n_models))
|
|
55
|
+
|
|
56
|
+
for j, name in enumerate(self.models):
|
|
57
|
+
scores = self._compute_scores(y, preds_dict[name])
|
|
58
|
+
self.matrix[:, j] = scores if self.mode == 'max' else -scores
|
|
59
|
+
|
|
60
|
+
self.model.fit(features)
|
|
61
|
+
|
|
62
|
+
def _kneighbors(self, x, k=None, loo=False):
|
|
63
|
+
"""
|
|
64
|
+
Query the fitted neighbor index, with optional leave-one-out (LOO)
|
|
65
|
+
exclusion of each query point's own occurrence in the DSEL.
|
|
66
|
+
|
|
67
|
+
Set loo=True when ``x`` is (part of) the same data this model was
|
|
68
|
+
fit on -- e.g. while tuning k / threshold / temperature directly on
|
|
69
|
+
the DSEL -- so a point doesn't end up neighboring itself at distance
|
|
70
|
+
0, which would otherwise dominate the routing.
|
|
71
|
+
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
x : np.ndarray, shape (batch, n_features)
|
|
75
|
+
k : int, optional
|
|
76
|
+
Neighborhood size. None defers to the finder's own default.
|
|
77
|
+
loo : bool
|
|
78
|
+
If True, query one extra neighbor per row and drop the
|
|
79
|
+
zero-distance match (the point itself) when present, so the
|
|
80
|
+
returned neighborhood still has the size a normal call would
|
|
81
|
+
have produced. Rows with no zero-distance match (e.g. ``x``
|
|
82
|
+
isn't actually part of the fitted DSEL) fall back to dropping
|
|
83
|
+
the farthest neighbor instead, so shapes stay consistent.
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
distances, indices : np.ndarray, each shape (batch, k_eff)
|
|
88
|
+
"""
|
|
89
|
+
if not loo:
|
|
90
|
+
return self.model.kneighbors(x, k=k)
|
|
91
|
+
|
|
92
|
+
# Different backends store their default k under different
|
|
93
|
+
# attribute names (n_neighbors vs k), so rather than guessing,
|
|
94
|
+
# resolve the effective k with one cheap probe call when it isn't
|
|
95
|
+
# given explicitly. Costs one extra query only in that case.
|
|
96
|
+
if k is None:
|
|
97
|
+
probe_distances, _ = self.model.kneighbors(x, k=k)
|
|
98
|
+
k = probe_distances.shape[1]
|
|
99
|
+
|
|
100
|
+
distances, indices = self.model.kneighbors(x, k=k + 1)
|
|
101
|
+
return _drop_self_match(distances, indices, k)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _drop_self_match(distances, indices, k, eps=1e-6):
|
|
105
|
+
"""
|
|
106
|
+
Drop one zero-distance neighbor per row from (batch, k+1) neighbor
|
|
107
|
+
results, returning (batch, k) arrays.
|
|
108
|
+
|
|
109
|
+
A query point's own occurrence in the DSEL, if present, is always the
|
|
110
|
+
closest possible neighbor (distance 0 is the global minimum for any
|
|
111
|
+
proper distance metric), so it's identified per row as whichever
|
|
112
|
+
column is nearest, gated on that distance being ~0. Rows without a
|
|
113
|
+
zero-distance match (point not actually in the DSEL) drop the
|
|
114
|
+
farthest neighbor instead, so every row keeps exactly k entries.
|
|
115
|
+
|
|
116
|
+
Note: if the DSEL contains true duplicate feature rows, only one
|
|
117
|
+
occurrence is dropped per query point -- the duplicates remain valid,
|
|
118
|
+
distinct neighbors. Distance order within each row need not be
|
|
119
|
+
sorted; this works either way.
|
|
120
|
+
"""
|
|
121
|
+
batch = distances.shape[0]
|
|
122
|
+
rows = np.arange(batch)
|
|
123
|
+
|
|
124
|
+
nearest_col = np.argmin(distances, axis=1)
|
|
125
|
+
is_self_match = distances[rows, nearest_col] < eps
|
|
126
|
+
|
|
127
|
+
farthest_col = np.argmax(distances, axis=1)
|
|
128
|
+
drop_col = np.where(is_self_match, nearest_col, farthest_col)
|
|
129
|
+
|
|
130
|
+
keep = np.ones(distances.shape, dtype=bool)
|
|
131
|
+
keep[rows, drop_col] = False
|
|
132
|
+
|
|
133
|
+
new_distances = distances[keep].reshape(batch, k)
|
|
134
|
+
new_indices = indices[keep].reshape(batch, k)
|
|
135
|
+
return new_distances, new_indices
|
|
@@ -42,6 +42,11 @@ class PredictBase:
|
|
|
42
42
|
**kwargs
|
|
43
43
|
Additional per-call overrides forwarded to _weights_batch.
|
|
44
44
|
Supported by most algorithms: ``threshold=<float>``.
|
|
45
|
+
KNN-based algorithms also support ``loo=<bool>`` (default
|
|
46
|
+
False): set True when X_test is (part of) the same data the
|
|
47
|
+
model was fit on -- e.g. while tuning hyperparameters on the
|
|
48
|
+
DSEL -- so each point's own occurrence is excluded from its
|
|
49
|
+
neighborhood instead of trivially matching itself at distance 0.
|
|
45
50
|
|
|
46
51
|
Returns
|
|
47
52
|
-------
|
|
@@ -79,7 +84,9 @@ class PredictBase:
|
|
|
79
84
|
temperature : float, optional
|
|
80
85
|
Forwarded to _weights_batch.
|
|
81
86
|
**kwargs
|
|
82
|
-
Forwarded to _weights_batch (e.g. ``threshold=<float
|
|
87
|
+
Forwarded to _weights_batch (e.g. ``threshold=<float>``, or
|
|
88
|
+
``loo=<bool>`` for leave-one-out neighbor exclusion when
|
|
89
|
+
X_test is drawn from the fit DSEL -- see predict_weights).
|
|
83
90
|
|
|
84
91
|
Returns
|
|
85
92
|
-------
|
|
@@ -38,6 +38,13 @@ class DEWSI(KNNBase):
|
|
|
38
38
|
distance_metric : str
|
|
39
39
|
Distance function to use for neighbor search. Default: 'euclidean'. See
|
|
40
40
|
neighbors.list_distance_metrics() for all options and per-backend availability.
|
|
41
|
+
|
|
42
|
+
Notes
|
|
43
|
+
-----
|
|
44
|
+
predict() and predict_weights() accept an optional ``loo=True`` keyword
|
|
45
|
+
for hyperparameter tuning directly on the DSEL this model was fit on:
|
|
46
|
+
it excludes each query point's own occurrence from its neighborhood so
|
|
47
|
+
it doesn't trivially neighbor itself at distance 0.
|
|
41
48
|
"""
|
|
42
49
|
|
|
43
50
|
def __init__(self, task, metric='mae', mode='min', k=10,
|
|
@@ -69,17 +76,23 @@ class DEWSI(KNNBase):
|
|
|
69
76
|
)
|
|
70
77
|
super().fit(features, y, preds_dict)
|
|
71
78
|
|
|
72
|
-
def _weights_batch(self, x, temperature=None, threshold=None):
|
|
79
|
+
def _weights_batch(self, x, temperature=None, threshold=None, k=None, loo=False):
|
|
73
80
|
"""
|
|
74
81
|
Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
|
|
75
82
|
Returns (batch, n_models) weight array.
|
|
83
|
+
|
|
84
|
+
loo : bool
|
|
85
|
+
Leave-one-out. Set True when x is (part of) the DSEL this model
|
|
86
|
+
was fit on -- e.g. while tuning hyperparameters -- so each
|
|
87
|
+
point's own occurrence is excluded from its neighborhood
|
|
88
|
+
instead of trivially matching itself at distance 0.
|
|
76
89
|
"""
|
|
77
90
|
t = temperature if temperature is not None else (
|
|
78
91
|
self._temperature if self._temperature is not None else
|
|
79
92
|
(0.5 if self.mode == 'min' else 1.0))
|
|
80
93
|
th = threshold if threshold is not None else self.threshold
|
|
81
94
|
|
|
82
|
-
distances, indices = self.
|
|
95
|
+
distances, indices = self._kneighbors(x, k=k, loo=loo) # both (batch, k)
|
|
83
96
|
|
|
84
97
|
# Inverse-distance-weighted average of each model's scores over K neighbors
|
|
85
98
|
inv_dist = 1.0 / np.maximum(distances, 1e-8) # (batch, k)
|
|
@@ -84,7 +84,7 @@ class DEWSIV(KNNBase):
|
|
|
84
84
|
preds = np.asarray(preds_dict[name])
|
|
85
85
|
self._var_matrix[:, j] = np.vectorize(_signed_residual)(y, preds)
|
|
86
86
|
|
|
87
|
-
def _weights_batch(self, x, temperature=None, threshold=None):
|
|
87
|
+
def _weights_batch(self, x, temperature=None, threshold=None, k=None, loo=False):
|
|
88
88
|
"""
|
|
89
89
|
Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
|
|
90
90
|
Returns (batch, n_models) weight array.
|
|
@@ -94,7 +94,7 @@ class DEWSIV(KNNBase):
|
|
|
94
94
|
(0.5 if self.mode == 'min' else 1.0))
|
|
95
95
|
th = threshold if threshold is not None else self.threshold
|
|
96
96
|
|
|
97
|
-
distances, indices = self.model.kneighbors(x) # both (batch, k)
|
|
97
|
+
distances, indices = self.model.kneighbors(x, k=k, loo=loo) # both (batch, k)
|
|
98
98
|
|
|
99
99
|
# Inverse-distance weights
|
|
100
100
|
inv_dist = 1.0 / np.maximum(distances, 1e-8) # (batch, k)
|
|
@@ -86,7 +86,7 @@ class DEWST(KNNBase):
|
|
|
86
86
|
)
|
|
87
87
|
super().fit(features, y, preds_dict)
|
|
88
88
|
|
|
89
|
-
def _weights_batch(self, x, temperature=None, threshold=None):
|
|
89
|
+
def _weights_batch(self, x, temperature=None, threshold=None, k=None, r2_threshold=None, loo=False):
|
|
90
90
|
"""
|
|
91
91
|
Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
|
|
92
92
|
Returns (batch, n_models) weight array.
|
|
@@ -94,9 +94,10 @@ class DEWST(KNNBase):
|
|
|
94
94
|
t = temperature if temperature is not None else (
|
|
95
95
|
self._temperature if self._temperature is not None else
|
|
96
96
|
(0.5 if self._real_mode == 'min' else 1.0))
|
|
97
|
-
th
|
|
97
|
+
th = threshold if threshold is not None else self.threshold
|
|
98
|
+
r2_th = r2_threshold if r2_threshold is not None else self.r2_threshold
|
|
98
99
|
|
|
99
|
-
distances, indices = self.model.kneighbors(x)
|
|
100
|
+
distances, indices = self.model.kneighbors(x, k=k, loo=loo) # (batch, k)
|
|
100
101
|
k = distances.shape[1]
|
|
101
102
|
|
|
102
103
|
# Inverse-distance weights
|
|
@@ -157,7 +158,7 @@ class DEWST(KNNBase):
|
|
|
157
158
|
trend_scores = intercept
|
|
158
159
|
|
|
159
160
|
# Blend: trust trend where R² ≥ threshold, fall back otherwise
|
|
160
|
-
use_trend = r2 >=
|
|
161
|
+
use_trend = r2 >= r2_th
|
|
161
162
|
avg_scores = np.where(use_trend, trend_scores, dewsi_scores)
|
|
162
163
|
|
|
163
164
|
# Standard DEWS softmax
|
|
@@ -65,7 +65,7 @@ class DEWSU(KNNBase):
|
|
|
65
65
|
)
|
|
66
66
|
super().fit(features, y, preds_dict)
|
|
67
67
|
|
|
68
|
-
def _weights_batch(self, x, temperature=None, threshold=None):
|
|
68
|
+
def _weights_batch(self, x, temperature=None, threshold=None, k=None, loo=False):
|
|
69
69
|
"""
|
|
70
70
|
Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
|
|
71
71
|
Returns (batch, n_models) weight array.
|
|
@@ -75,7 +75,7 @@ class DEWSU(KNNBase):
|
|
|
75
75
|
(0.5 if self.mode == 'min' else 1.0))
|
|
76
76
|
th = threshold if threshold is not None else self.threshold
|
|
77
77
|
|
|
78
|
-
_, indices = self.model.kneighbors(x) # (batch, k)
|
|
78
|
+
_, indices = self.model.kneighbors(x, k=k, loo=loo) # (batch, k)
|
|
79
79
|
|
|
80
80
|
# Average each model's scores over the K neighbors
|
|
81
81
|
avg_scores = self.matrix[indices].mean(axis=1) # (batch, n_models)
|
|
@@ -84,7 +84,7 @@ class DEWSV(KNNBase):
|
|
|
84
84
|
preds = np.asarray(preds_dict[name])
|
|
85
85
|
self._var_matrix[:, j] = np.vectorize(_signed_residual)(y, preds)
|
|
86
86
|
|
|
87
|
-
def _weights_batch(self, x, temperature=None, threshold=None):
|
|
87
|
+
def _weights_batch(self, x, temperature=None, threshold=None, k=None, loo=False):
|
|
88
88
|
"""
|
|
89
89
|
Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
|
|
90
90
|
Returns (batch, n_models) weight array.
|
|
@@ -94,7 +94,7 @@ class DEWSV(KNNBase):
|
|
|
94
94
|
(0.5 if self.mode == 'min' else 1.0))
|
|
95
95
|
th = threshold if threshold is not None else self.threshold
|
|
96
96
|
|
|
97
|
-
_, indices = self.model.kneighbors(x) # (batch, k)
|
|
97
|
+
_, indices = self.model.kneighbors(x, k=k, loo=loo) # (batch, k)
|
|
98
98
|
|
|
99
99
|
# Uniform average of each model's scores over K neighbors
|
|
100
100
|
neighbor_scores = self.matrix[indices] # (batch, k, n_models)
|
|
@@ -55,7 +55,7 @@ class KNORAE(KNNBase):
|
|
|
55
55
|
)
|
|
56
56
|
super().fit(features, y, preds_dict)
|
|
57
57
|
|
|
58
|
-
def _weights_batch(self, x, temperature=None, threshold=None):
|
|
58
|
+
def _weights_batch(self, x, temperature=None, threshold=None, k=None, loo=False):
|
|
59
59
|
"""
|
|
60
60
|
Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
|
|
61
61
|
Returns (batch, n_models) weight array.
|
|
@@ -64,7 +64,7 @@ class KNORAE(KNNBase):
|
|
|
64
64
|
th = threshold if threshold is not None else self.threshold
|
|
65
65
|
n_models = len(self.models)
|
|
66
66
|
|
|
67
|
-
_, indices = self.model.kneighbors(x)
|
|
67
|
+
_, indices = self.model.kneighbors(x, k=k, loo=loo)
|
|
68
68
|
k = indices.shape[1]
|
|
69
69
|
neighbor_scores = self.matrix[indices] # (batch, k, n_models)
|
|
70
70
|
|
|
@@ -56,7 +56,7 @@ class KNORAIU(KNNBase):
|
|
|
56
56
|
)
|
|
57
57
|
super().fit(features, y, preds_dict)
|
|
58
58
|
|
|
59
|
-
def _weights_batch(self, x, temperature=None, threshold=None):
|
|
59
|
+
def _weights_batch(self, x, temperature=None, threshold=None, k=None, loo=False):
|
|
60
60
|
"""
|
|
61
61
|
Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
|
|
62
62
|
Returns (batch, n_models) weight array.
|
|
@@ -64,7 +64,7 @@ class KNORAIU(KNNBase):
|
|
|
64
64
|
"""
|
|
65
65
|
th = threshold if threshold is not None else self.threshold
|
|
66
66
|
|
|
67
|
-
distances, indices = self.model.kneighbors(x) # both (batch, k)
|
|
67
|
+
distances, indices = self.model.kneighbors(x, k=k, loo=loo) # both (batch, k)
|
|
68
68
|
neighbor_scores = self.matrix[indices] # (batch, k, n_models)
|
|
69
69
|
|
|
70
70
|
# Normalize per neighbor: best model = 1.0, worst = 0.0
|
|
@@ -56,7 +56,7 @@ class KNORAU(KNNBase):
|
|
|
56
56
|
)
|
|
57
57
|
super().fit(features, y, preds_dict)
|
|
58
58
|
|
|
59
|
-
def _weights_batch(self, x, temperature=None, threshold=None):
|
|
59
|
+
def _weights_batch(self, x, temperature=None, threshold=None, k=None, loo=False):
|
|
60
60
|
"""
|
|
61
61
|
Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
|
|
62
62
|
Returns (batch, n_models) weight array.
|
|
@@ -64,7 +64,7 @@ class KNORAU(KNNBase):
|
|
|
64
64
|
"""
|
|
65
65
|
th = threshold if threshold is not None else self.threshold
|
|
66
66
|
|
|
67
|
-
_, indices = self.model.kneighbors(x)
|
|
67
|
+
_, indices = self.model.kneighbors(x, k=k, loo=loo)
|
|
68
68
|
neighbor_scores = self.matrix[indices] # (batch, k, n_models)
|
|
69
69
|
|
|
70
70
|
# Normalize per neighbor: best model = 1.0, worst = 0.0
|
|
@@ -74,7 +74,7 @@ class LWSEI(PredictBase):
|
|
|
74
74
|
self._y_val = y
|
|
75
75
|
self._finder.fit(features)
|
|
76
76
|
|
|
77
|
-
def _weights_batch(self, x, temperature=None, **kwargs):
|
|
77
|
+
def _weights_batch(self, x, temperature=None, k=None, loo=False, **kwargs):
|
|
78
78
|
"""
|
|
79
79
|
Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
|
|
80
80
|
Returns (batch, n_models) weight array.
|
|
@@ -87,7 +87,7 @@ class LWSEI(PredictBase):
|
|
|
87
87
|
n_models = len(self.models)
|
|
88
88
|
uniform = np.full(n_models, 1.0 / n_models)
|
|
89
89
|
|
|
90
|
-
distances, indices = self._finder.kneighbors(x) # (batch, k)
|
|
90
|
+
distances, indices = self._finder.kneighbors(x, k=k, loo=loo) # (batch, k)
|
|
91
91
|
weights_out = np.empty((batch_size, n_models))
|
|
92
92
|
|
|
93
93
|
for b in range(batch_size):
|
|
@@ -74,7 +74,7 @@ class LWSEU(PredictBase):
|
|
|
74
74
|
self._y_val = y
|
|
75
75
|
self._finder.fit(features)
|
|
76
76
|
|
|
77
|
-
def _weights_batch(self, x, temperature=None, **kwargs):
|
|
77
|
+
def _weights_batch(self, x, temperature=None, k=None, loo=False, **kwargs):
|
|
78
78
|
"""
|
|
79
79
|
Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
|
|
80
80
|
Returns (batch, n_models) weight array.
|
|
@@ -87,7 +87,7 @@ class LWSEU(PredictBase):
|
|
|
87
87
|
n_models = len(self.models)
|
|
88
88
|
uniform = np.full(n_models, 1.0 / n_models)
|
|
89
89
|
|
|
90
|
-
distances, indices = self._finder.kneighbors(x) # (batch, k)
|
|
90
|
+
distances, indices = self._finder.kneighbors(x, k=k, loo=loo) # (batch, k)
|
|
91
91
|
weights_out = np.empty((batch_size, n_models))
|
|
92
92
|
|
|
93
93
|
for b in range(batch_size):
|
|
@@ -54,7 +54,7 @@ class OLA(KNNBase):
|
|
|
54
54
|
if mat_max > mat_min:
|
|
55
55
|
self.matrix = (self.matrix - mat_min) / (mat_max - mat_min)
|
|
56
56
|
|
|
57
|
-
def _weights_batch(self, x, temperature=None, threshold=None):
|
|
57
|
+
def _weights_batch(self, x, temperature=None, threshold=None, k=None, loo=False):
|
|
58
58
|
"""
|
|
59
59
|
Core weight computation. x is a 2-D float64 numpy array (batch, n_features).
|
|
60
60
|
Returns (batch, n_models) weight array.
|
|
@@ -63,7 +63,7 @@ class OLA(KNNBase):
|
|
|
63
63
|
"""
|
|
64
64
|
batch_size = x.shape[0]
|
|
65
65
|
|
|
66
|
-
_, indices = self.model.kneighbors(x)
|
|
66
|
+
_, indices = self.model.kneighbors(x, k=k, loo=loo)
|
|
67
67
|
avg_scores = self.matrix[indices].mean(axis=1) # (batch, n_models)
|
|
68
68
|
best_indices = np.argmax(avg_scores, axis=1)
|
|
69
69
|
|
|
@@ -89,11 +89,6 @@ _NMSLIB_METRIC_MAP = {
|
|
|
89
89
|
'dot': 'negdotprod',
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
-
# Unified view for HNSWNeighborFinder validation: union of both backends.
|
|
93
|
-
# We keep the old name for backwards compatibility.
|
|
94
|
-
_HNSW_METRIC_MAP = _HNSWLIB_METRIC_MAP # kept for any external references
|
|
95
|
-
_HNSW_METRICS = _UNIVERSAL_METRICS # partial — see fit() for fallback note
|
|
96
|
-
|
|
97
92
|
# All metrics callable from the public API.
|
|
98
93
|
ALL_METRICS = _KNN_METRICS | {'jensenshannon', 'dot'}
|
|
99
94
|
|
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
from deskit.base.base import BaseRouter
|
|
2
|
-
from deskit.base.predictbase import PredictBase
|
|
3
|
-
import numpy as np
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class KNNBase(PredictBase, BaseRouter):
|
|
7
|
-
"""
|
|
8
|
-
Base for KNN-based DES algorithms.
|
|
9
|
-
|
|
10
|
-
Inheriting PredictBase gives every subclass the public
|
|
11
|
-
predict() and predict_weights() API automatically.
|
|
12
|
-
Subclasses must implement _weights_batch().
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
def __init__(self, metric, mode='max', neighbor_finder=None, task='classification'):
|
|
16
|
-
"""
|
|
17
|
-
Parameters
|
|
18
|
-
----------
|
|
19
|
-
metric : callable
|
|
20
|
-
Per-sample scoring function: (y_true, y_pred) -> float.
|
|
21
|
-
mode : str
|
|
22
|
-
'max' if higher scores are better, 'min' if lower.
|
|
23
|
-
neighbor_finder : NeighborFinder
|
|
24
|
-
Backend used for neighborhood queries.
|
|
25
|
-
"""
|
|
26
|
-
self.metric = metric
|
|
27
|
-
self.mode = mode
|
|
28
|
-
self.model = neighbor_finder
|
|
29
|
-
self.matrix = None # (n_val, n_models); higher is always better
|
|
30
|
-
self.models = None # ordered list of model names
|
|
31
|
-
self.task = task
|
|
32
|
-
|
|
33
|
-
def _compute_scores(self, y, preds):
|
|
34
|
-
"""
|
|
35
|
-
Return a 1D array of per-sample metric scores.
|
|
36
|
-
|
|
37
|
-
preds may be 1D (scalar predictions) or 2D (probability arrays, one
|
|
38
|
-
row per sample).
|
|
39
|
-
"""
|
|
40
|
-
preds = np.asarray(preds)
|
|
41
|
-
if preds.ndim == 2:
|
|
42
|
-
return np.array([self.metric(y[i], preds[i]) for i in range(len(y))])
|
|
43
|
-
return np.vectorize(self.metric)(y, preds)
|
|
44
|
-
|
|
45
|
-
def fit(self, features, y, preds_dict):
|
|
46
|
-
"""
|
|
47
|
-
Build the score matrix and fit the neighbor index.
|
|
48
|
-
|
|
49
|
-
This method expects pre-validated numpy arrays.
|
|
50
|
-
"""
|
|
51
|
-
self.models = list(preds_dict.keys())
|
|
52
|
-
n_val = len(y)
|
|
53
|
-
n_models = len(self.models)
|
|
54
|
-
self.matrix = np.zeros((n_val, n_models))
|
|
55
|
-
|
|
56
|
-
for j, name in enumerate(self.models):
|
|
57
|
-
scores = self._compute_scores(y, preds_dict[name])
|
|
58
|
-
self.matrix[:, j] = scores if self.mode == 'max' else -scores
|
|
59
|
-
|
|
60
|
-
self.model.fit(features)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|