deskit 0.3.0__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deskit-0.3.0/src/deskit.egg-info → deskit-1.0.0}/PKG-INFO +59 -49
- {deskit-0.3.0 → deskit-1.0.0}/README.md +58 -48
- {deskit-0.3.0 → deskit-1.0.0}/pyproject.toml +1 -1
- deskit-1.0.0/src/deskit/des/dewsiv.py +195 -0
- deskit-1.0.0/src/deskit/des/dewst.py +200 -0
- deskit-1.0.0/src/deskit/des/dewsv.py +185 -0
- deskit-1.0.0/src/deskit/des/lwsei.py +156 -0
- deskit-1.0.0/src/deskit/des/lwseu.py +133 -0
- {deskit-0.3.0 → deskit-1.0.0/src/deskit.egg-info}/PKG-INFO +59 -49
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit.egg-info/SOURCES.txt +5 -1
- deskit-0.3.0/src/deskit/analysis.py +0 -377
- {deskit-0.3.0 → deskit-1.0.0}/LICENSE +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/setup.cfg +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit/__init__.py +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit/_config.py +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit/base/__init__.py +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit/base/base.py +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit/base/knnbase.py +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit/des/__init__.py +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit/des/dewsi.py +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit/des/dewsu.py +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit/des/knorae.py +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit/des/knoraiu.py +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit/des/knorau.py +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit/des/ola.py +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit/metrics.py +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit/neighbors.py +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit/router.py +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit/utils.py +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit.egg-info/dependency_links.txt +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit.egg-info/requires.txt +0 -0
- {deskit-0.3.0 → deskit-1.0.0}/src/deskit.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deskit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: A Python library for Dynamic Ensemble Selection
|
|
5
5
|
Author: Tikhon Vodyanov
|
|
6
6
|
License-Expression: MIT
|
|
@@ -95,7 +95,7 @@ NumPy (>= 1.21)
|
|
|
95
95
|
|
|
96
96
|
## Quick start
|
|
97
97
|
|
|
98
|
-
|
|
98
|
+
For a more detailed understanding of how to use the library, consult the [documentation](https://TikaaVo.github.io/deskit/).
|
|
99
99
|
|
|
100
100
|
```python
|
|
101
101
|
from deskit.des.knorau import KNORAU
|
|
@@ -150,14 +150,22 @@ weights = router.predict(X_test[i])
|
|
|
150
150
|
|
|
151
151
|
## Algorithms
|
|
152
152
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
|
157
|
-
|
|
158
|
-
| `
|
|
159
|
-
| `
|
|
160
|
-
| `
|
|
153
|
+
Full explanation of the algorithms, syntax, and parameters is available in the [documentation](https://TikaaVo.github.io/deskit/).
|
|
154
|
+
If you're struggling to decide on which algorithm to use, see the [algorithm selection guide](https://TikaaVo.github.io/deskit/selection).
|
|
155
|
+
|
|
156
|
+
| Method | Best for | Notes |
|
|
157
|
+
|------------|----------------|--------------------------------------------------------------------------------------------------------|
|
|
158
|
+
| `DEWS-U` | Regression | Softmax over neighborhood-averaged scores. Temperature controls sharpness. |
|
|
159
|
+
| `DEWS-I` | Regression | Like DEWS-U but scores are inverse-distance weighted. |
|
|
160
|
+
| `DEWS-T` | Both | Like DEWS-I but fits a weighted trend line over neighbor scores. |
|
|
161
|
+
| `DEWS-V` | Regression | Like DEWS-U but scores are variance-penalized. |
|
|
162
|
+
| `DEWS-IV` | Regression | Like DEWS-V but scores are also inverse-distance weighted. |
|
|
163
|
+
| `LWSE-U` | Both | Per-sample NNLS weight estimation over the local neighbourhood. |
|
|
164
|
+
| `LWSE-I` | Both | Like LWSE-U but rows are inverse-distance weighted. |
|
|
165
|
+
| `KNORA-U` | Classification | Each model earns one vote per neighbor it correctly classifies. |
|
|
166
|
+
| `KNORA-E` | Classification | Only models correct on all neighbors survive; falls back to smaller neighborhoods. |
|
|
167
|
+
| `KNORA-IU` | Classification | Like KNORA-U but votes are inverse-distance weighted. |
|
|
168
|
+
| `OLA` | Both | Hard selection: only the single best model in the neighborhood contributes. |
|
|
161
169
|
|
|
162
170
|
---
|
|
163
171
|
|
|
@@ -218,74 +226,76 @@ passed features either need to be run through a feature extractor beforehand, su
|
|
|
218
226
|
|
|
219
227
|
## Benchmark results
|
|
220
228
|
|
|
221
|
-
|
|
229
|
+
20-seed benchmark (seeds 0–19) on standard sklearn and OpenML datasets. "Best Single" is the best
|
|
222
230
|
individual model selected on the validation set. "Simple Average" is uniform
|
|
223
231
|
equal-weight blending, included as a baseline.
|
|
224
232
|
|
|
225
233
|
It is important to consider that these experiments were run with the default hyperparameters, meaning that
|
|
226
234
|
they could vary greatly with different values, and results could improve with tuning.
|
|
227
|
-
For a more detailed benchmark breakdown, see the [documentation](https://TikaaVo.github.io/deskit/).
|
|
235
|
+
For a more detailed benchmark breakdown, see the [benchmark in the documentation](https://TikaaVo.github.io/deskit/benchmark).
|
|
228
236
|
To see the full results, see `results.txt` in the `tests` folder.
|
|
229
237
|
|
|
230
|
-
Pool: KNN, Decision Tree, SVR, Ridge, Bayesian Ridge.
|
|
231
|
-
|
|
232
238
|
This pool was selected for having variability in architectures while avoiding a single dominant model.
|
|
233
239
|
|
|
234
|
-
deskit algorithms tested: OLA, DEWS-U, DEWS-I, KNORA-U, KNORA-E, KNORA-IU.
|
|
240
|
+
deskit algorithms tested: OLA, DEWS-U, DEWS-I, DEWS-T, DEWS-V, DEWS-IV, LWSE-U, LWSE-I, KNORA-U, KNORA-E, KNORA-IU.
|
|
235
241
|
|
|
236
242
|
### Regression (MAE, lower is better)
|
|
237
243
|
|
|
238
|
-
|
|
244
|
+
Pool: KNN, Decision Tree, SVR, Ridge, Bayesian Ridge.
|
|
245
|
+
|
|
246
|
+
% shown as delta vs Best Single. 20-seed mean.
|
|
239
247
|
|
|
240
|
-
| Dataset | Best Single | Simple Avg | deskit best
|
|
241
|
-
|
|
242
|
-
| California Housing (sklearn) | 0.
|
|
243
|
-
| Bike Sharing (OpenML) | 51.
|
|
244
|
-
| Abalone (OpenML) | **1.
|
|
245
|
-
| Diabetes (sklearn) | **44.
|
|
246
|
-
| Concrete Strength (OpenML) | 5.
|
|
248
|
+
| Dataset | Best Single | Simple Avg | deskit best |
|
|
249
|
+
|------------------------------|-------------|------------|---------------------------|
|
|
250
|
+
| California Housing (sklearn) | 0.3956 | +7.99% | **−2.54%** (DEWS-I) |
|
|
251
|
+
| Bike Sharing (OpenML) | 51.678 | +47.77% | **−6.86%** (DEWS-I) |
|
|
252
|
+
| Abalone (OpenML) | **1.4981** | +1.14% | +1.47% (KNORA-U/KNORA-IU) |
|
|
253
|
+
| Diabetes (sklearn) | **44.504** | +3.18% | +0.86% (DEWS-IV) |
|
|
254
|
+
| Concrete Strength (OpenML) | 5.2686 | +23.66% | **−5.41%** (LWSE-I) |
|
|
247
255
|
|
|
248
256
|
deskit beats best single and simple averaging on 3/5 regression datasets. This shows how DES can provide a
|
|
249
257
|
strong boost if used on the right dataset, but it might be counterproductive if used blindly.
|
|
250
258
|
|
|
251
259
|
KNORA variants are designed for classification, which explains the poor performance
|
|
252
|
-
on regression datasets; However, some
|
|
253
|
-
feature space is has hard clusters (like in Concrete Strength) or when the target is discrete
|
|
260
|
+
on regression datasets; However, some exceptions can occur in certain datasets when the target is discrete
|
|
254
261
|
and classification-like (like in Abalone).
|
|
255
262
|
|
|
263
|
+
DEWS-I and LWSE-I show the largest improvements on their respective datasets.
|
|
264
|
+
|
|
256
265
|
### Classification (Accuracy, higher is better)
|
|
257
266
|
|
|
258
|
-
|
|
267
|
+
Pool: KNN, Decision Tree, Gaussian NB, SVM-RBF, Logistic Regression.
|
|
268
|
+
|
|
269
|
+
% shown as delta vs Best Single. 20-seed mean.
|
|
259
270
|
|
|
260
|
-
| Dataset | Best Single | Simple Avg | deskit best
|
|
261
|
-
|
|
262
|
-
| HAR (OpenML) | 98.24% | −0.
|
|
263
|
-
| Yeast (OpenML) |
|
|
264
|
-
| Image Segment (OpenML) | 93.
|
|
265
|
-
| Waveform (OpenML) | **
|
|
266
|
-
| Vowel (OpenML) |
|
|
271
|
+
| Dataset | Best Single | Simple Avg | deskit best |
|
|
272
|
+
|------------------------|-------------|------------|--------------------------------|
|
|
273
|
+
| HAR (OpenML) | 98.24% | −0.33% | **+0.16%** (DEWS-T) |
|
|
274
|
+
| Yeast (OpenML) | 58.87% | +0.77% | **+1.66%** (KNORA-IU) |
|
|
275
|
+
| Image Segment (OpenML) | 93.70% | +1.40% | **+2.25%** (DEWS-T / DEWS-IV) |
|
|
276
|
+
| Waveform (OpenML) | **85.91%** | −0.98% | −0.39% (DEWS-T) |
|
|
277
|
+
| Vowel (OpenML) | 89.95% | −2.05% | **+2.95%** (LWSE-I) |
|
|
267
278
|
|
|
268
|
-
deskit beats or matches best single and simple averaging on 4/5 classification datasets.
|
|
269
|
-
can improve or hurt performance, so it must be used wisely, but if used correctly it can show promising results.
|
|
279
|
+
deskit beats or matches best single and simple averaging on 4/5 classification datasets.
|
|
270
280
|
|
|
271
281
|
### Speed (mean ms fit + predict, 20 seeds, all tested algorithms combined)
|
|
272
282
|
|
|
273
|
-
Consider that usually it is recommended to only use one algorithm at a time, this benchmark ran
|
|
274
|
-
same time, so with a single one runtime is expected to be about
|
|
283
|
+
Consider that usually it is recommended to only use one algorithm at a time, this benchmark ran eleven of them at the
|
|
284
|
+
same time, so with a single one runtime is expected to be about 11x faster. For this benchmark, `preset='balanced'` was used,
|
|
275
285
|
so the backend was an ANN algorithm with FAISS IVF.
|
|
276
286
|
|
|
277
|
-
| Dataset | deskit
|
|
278
|
-
|
|
279
|
-
| California Housing |
|
|
280
|
-
| Bike Sharing |
|
|
281
|
-
| Abalone |
|
|
282
|
-
| Diabetes |
|
|
283
|
-
|
|
|
284
|
-
| HAR |
|
|
285
|
-
| Yeast |
|
|
286
|
-
| Image Segment |
|
|
287
|
-
| Waveform |
|
|
288
|
-
| Vowel |
|
|
287
|
+
| Dataset | deskit (11 algorithms) |
|
|
288
|
+
|--------------------|------------------------|
|
|
289
|
+
| California Housing | 351.0 ms |
|
|
290
|
+
| Bike Sharing | 283.5 ms |
|
|
291
|
+
| Abalone | 72.9 ms |
|
|
292
|
+
| Diabetes | 14.0 ms |
|
|
293
|
+
| Concrete Strength | 22.5 ms |
|
|
294
|
+
| HAR | 693.1 ms |
|
|
295
|
+
| Yeast | 44.7 ms |
|
|
296
|
+
| Image Segment | 69.9 ms |
|
|
297
|
+
| Waveform | 124.5 ms |
|
|
298
|
+
| Vowel | 39.0 ms |
|
|
289
299
|
|
|
290
300
|
deskit caches all model predictions on the validation set at fit time and reads
|
|
291
301
|
from that matrix at inference.
|
|
@@ -64,7 +64,7 @@ NumPy (>= 1.21)
|
|
|
64
64
|
|
|
65
65
|
## Quick start
|
|
66
66
|
|
|
67
|
-
|
|
67
|
+
For a more detailed understanding of how to use the library, consult the [documentation](https://TikaaVo.github.io/deskit/).
|
|
68
68
|
|
|
69
69
|
```python
|
|
70
70
|
from deskit.des.knorau import KNORAU
|
|
@@ -119,14 +119,22 @@ weights = router.predict(X_test[i])
|
|
|
119
119
|
|
|
120
120
|
## Algorithms
|
|
121
121
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
|
126
|
-
|
|
127
|
-
| `
|
|
128
|
-
| `
|
|
129
|
-
| `
|
|
122
|
+
Full explanation of the algorithms, syntax, and parameters is available in the [documentation](https://TikaaVo.github.io/deskit/).
|
|
123
|
+
If you're struggling to decide on which algorithm to use, see the [algorithm selection guide](https://TikaaVo.github.io/deskit/selection).
|
|
124
|
+
|
|
125
|
+
| Method | Best for | Notes |
|
|
126
|
+
|------------|----------------|--------------------------------------------------------------------------------------------------------|
|
|
127
|
+
| `DEWS-U` | Regression | Softmax over neighborhood-averaged scores. Temperature controls sharpness. |
|
|
128
|
+
| `DEWS-I` | Regression | Like DEWS-U but scores are inverse-distance weighted. |
|
|
129
|
+
| `DEWS-T` | Both | Like DEWS-I but fits a weighted trend line over neighbor scores. |
|
|
130
|
+
| `DEWS-V` | Regression | Like DEWS-U but scores are variance-penalized. |
|
|
131
|
+
| `DEWS-IV` | Regression | Like DEWS-V but scores are also inverse-distance weighted. |
|
|
132
|
+
| `LWSE-U` | Both | Per-sample NNLS weight estimation over the local neighbourhood. |
|
|
133
|
+
| `LWSE-I` | Both | Like LWSE-U but rows are inverse-distance weighted. |
|
|
134
|
+
| `KNORA-U` | Classification | Each model earns one vote per neighbor it correctly classifies. |
|
|
135
|
+
| `KNORA-E` | Classification | Only models correct on all neighbors survive; falls back to smaller neighborhoods. |
|
|
136
|
+
| `KNORA-IU` | Classification | Like KNORA-U but votes are inverse-distance weighted. |
|
|
137
|
+
| `OLA` | Both | Hard selection: only the single best model in the neighborhood contributes. |
|
|
130
138
|
|
|
131
139
|
---
|
|
132
140
|
|
|
@@ -187,74 +195,76 @@ passed features either need to be run through a feature extractor beforehand, su
|
|
|
187
195
|
|
|
188
196
|
## Benchmark results
|
|
189
197
|
|
|
190
|
-
|
|
198
|
+
20-seed benchmark (seeds 0–19) on standard sklearn and OpenML datasets. "Best Single" is the best
|
|
191
199
|
individual model selected on the validation set. "Simple Average" is uniform
|
|
192
200
|
equal-weight blending, included as a baseline.
|
|
193
201
|
|
|
194
202
|
It is important to consider that these experiments were run with the default hyperparameters, meaning that
|
|
195
203
|
they could vary greatly with different values, and results could improve with tuning.
|
|
196
|
-
For a more detailed benchmark breakdown, see the [documentation](https://TikaaVo.github.io/deskit/).
|
|
204
|
+
For a more detailed benchmark breakdown, see the [benchmark in the documentation](https://TikaaVo.github.io/deskit/benchmark).
|
|
197
205
|
To see the full results, see `results.txt` in the `tests` folder.
|
|
198
206
|
|
|
199
|
-
Pool: KNN, Decision Tree, SVR, Ridge, Bayesian Ridge.
|
|
200
|
-
|
|
201
207
|
This pool was selected for having variability in architectures while avoiding a single dominant model.
|
|
202
208
|
|
|
203
|
-
deskit algorithms tested: OLA, DEWS-U, DEWS-I, KNORA-U, KNORA-E, KNORA-IU.
|
|
209
|
+
deskit algorithms tested: OLA, DEWS-U, DEWS-I, DEWS-T, DEWS-V, DEWS-IV, LWSE-U, LWSE-I, KNORA-U, KNORA-E, KNORA-IU.
|
|
204
210
|
|
|
205
211
|
### Regression (MAE, lower is better)
|
|
206
212
|
|
|
207
|
-
|
|
213
|
+
Pool: KNN, Decision Tree, SVR, Ridge, Bayesian Ridge.
|
|
214
|
+
|
|
215
|
+
% shown as delta vs Best Single. 20-seed mean.
|
|
208
216
|
|
|
209
|
-
| Dataset | Best Single | Simple Avg | deskit best
|
|
210
|
-
|
|
211
|
-
| California Housing (sklearn) | 0.
|
|
212
|
-
| Bike Sharing (OpenML) | 51.
|
|
213
|
-
| Abalone (OpenML) | **1.
|
|
214
|
-
| Diabetes (sklearn) | **44.
|
|
215
|
-
| Concrete Strength (OpenML) | 5.
|
|
217
|
+
| Dataset | Best Single | Simple Avg | deskit best |
|
|
218
|
+
|------------------------------|-------------|------------|---------------------------|
|
|
219
|
+
| California Housing (sklearn) | 0.3956 | +7.99% | **−2.54%** (DEWS-I) |
|
|
220
|
+
| Bike Sharing (OpenML) | 51.678 | +47.77% | **−6.86%** (DEWS-I) |
|
|
221
|
+
| Abalone (OpenML) | **1.4981** | +1.14% | +1.47% (KNORA-U/KNORA-IU) |
|
|
222
|
+
| Diabetes (sklearn) | **44.504** | +3.18% | +0.86% (DEWS-IV) |
|
|
223
|
+
| Concrete Strength (OpenML) | 5.2686 | +23.66% | **−5.41%** (LWSE-I) |
|
|
216
224
|
|
|
217
225
|
deskit beats best single and simple averaging on 3/5 regression datasets. This shows how DES can provide a
|
|
218
226
|
strong boost if used on the right dataset, but it might be counterproductive if used blindly.
|
|
219
227
|
|
|
220
228
|
KNORA variants are designed for classification, which explains the poor performance
|
|
221
|
-
on regression datasets; However, some
|
|
222
|
-
feature space is has hard clusters (like in Concrete Strength) or when the target is discrete
|
|
229
|
+
on regression datasets; However, some exceptions can occur in certain datasets when the target is discrete
|
|
223
230
|
and classification-like (like in Abalone).
|
|
224
231
|
|
|
232
|
+
DEWS-I and LWSE-I show the largest improvements on their respective datasets.
|
|
233
|
+
|
|
225
234
|
### Classification (Accuracy, higher is better)
|
|
226
235
|
|
|
227
|
-
|
|
236
|
+
Pool: KNN, Decision Tree, Gaussian NB, SVM-RBF, Logistic Regression.
|
|
237
|
+
|
|
238
|
+
% shown as delta vs Best Single. 20-seed mean.
|
|
228
239
|
|
|
229
|
-
| Dataset | Best Single | Simple Avg | deskit best
|
|
230
|
-
|
|
231
|
-
| HAR (OpenML) | 98.24% | −0.
|
|
232
|
-
| Yeast (OpenML) |
|
|
233
|
-
| Image Segment (OpenML) | 93.
|
|
234
|
-
| Waveform (OpenML) | **
|
|
235
|
-
| Vowel (OpenML) |
|
|
240
|
+
| Dataset | Best Single | Simple Avg | deskit best |
|
|
241
|
+
|------------------------|-------------|------------|--------------------------------|
|
|
242
|
+
| HAR (OpenML) | 98.24% | −0.33% | **+0.16%** (DEWS-T) |
|
|
243
|
+
| Yeast (OpenML) | 58.87% | +0.77% | **+1.66%** (KNORA-IU) |
|
|
244
|
+
| Image Segment (OpenML) | 93.70% | +1.40% | **+2.25%** (DEWS-T / DEWS-IV) |
|
|
245
|
+
| Waveform (OpenML) | **85.91%** | −0.98% | −0.39% (DEWS-T) |
|
|
246
|
+
| Vowel (OpenML) | 89.95% | −2.05% | **+2.95%** (LWSE-I) |
|
|
236
247
|
|
|
237
|
-
deskit beats or matches best single and simple averaging on 4/5 classification datasets.
|
|
238
|
-
can improve or hurt performance, so it must be used wisely, but if used correctly it can show promising results.
|
|
248
|
+
deskit beats or matches best single and simple averaging on 4/5 classification datasets.
|
|
239
249
|
|
|
240
250
|
### Speed (mean ms fit + predict, 20 seeds, all tested algorithms combined)
|
|
241
251
|
|
|
242
|
-
Consider that usually it is recommended to only use one algorithm at a time, this benchmark ran
|
|
243
|
-
same time, so with a single one runtime is expected to be about
|
|
252
|
+
Consider that usually it is recommended to only use one algorithm at a time, this benchmark ran eleven of them at the
|
|
253
|
+
same time, so with a single one runtime is expected to be about 11x faster. For this benchmark, `preset='balanced'` was used,
|
|
244
254
|
so the backend was an ANN algorithm with FAISS IVF.
|
|
245
255
|
|
|
246
|
-
| Dataset | deskit
|
|
247
|
-
|
|
248
|
-
| California Housing |
|
|
249
|
-
| Bike Sharing |
|
|
250
|
-
| Abalone |
|
|
251
|
-
| Diabetes |
|
|
252
|
-
|
|
|
253
|
-
| HAR |
|
|
254
|
-
| Yeast |
|
|
255
|
-
| Image Segment |
|
|
256
|
-
| Waveform |
|
|
257
|
-
| Vowel |
|
|
256
|
+
| Dataset | deskit (11 algorithms) |
|
|
257
|
+
|--------------------|------------------------|
|
|
258
|
+
| California Housing | 351.0 ms |
|
|
259
|
+
| Bike Sharing | 283.5 ms |
|
|
260
|
+
| Abalone | 72.9 ms |
|
|
261
|
+
| Diabetes | 14.0 ms |
|
|
262
|
+
| Concrete Strength | 22.5 ms |
|
|
263
|
+
| HAR | 693.1 ms |
|
|
264
|
+
| Yeast | 44.7 ms |
|
|
265
|
+
| Image Segment | 69.9 ms |
|
|
266
|
+
| Waveform | 124.5 ms |
|
|
267
|
+
| Vowel | 39.0 ms |
|
|
258
268
|
|
|
259
269
|
deskit caches all model predictions on the validation set at fit time and reads
|
|
260
270
|
from that matrix at inference.
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DEWS-IV: Distance-weighted Ensemble with Softmax — Inverse-distance + Variance-penalised.
|
|
3
|
+
"""
|
|
4
|
+
from deskit.base.knnbase import KNNBase
|
|
5
|
+
from deskit._config import make_finder, resolve_metric, prep_fit_inputs
|
|
6
|
+
from deskit.utils import to_numpy
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
_SIGNED_METRICS = {'mae', 'mse'}
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _signed_residual(y_true, y_pred):
|
|
14
|
+
return float(y_true) - float(y_pred)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class DEWSIV(KNNBase):
|
|
18
|
+
"""
|
|
19
|
+
DEWS-IV: Distance-weighted Ensemble with Softmax — Inverse-distance + Variance-penalised.
|
|
20
|
+
|
|
21
|
+
Combines DEWS-I and DEWS-V. The mean score is inverse-distance weighted
|
|
22
|
+
(closer neighbours contribute more), and the variance penalty is also
|
|
23
|
+
computed with the same inverse-distance weights, so erratic behaviour
|
|
24
|
+
close to the test point is penalised more heavily than erratic behaviour
|
|
25
|
+
among distant neighbours.
|
|
26
|
+
|
|
27
|
+
Both scores and variance are normalised to [0, 1] within each neighbourhood
|
|
28
|
+
before the penalty is applied, making the adjustment dimensionless and
|
|
29
|
+
consistent regardless of metric scale or task type.
|
|
30
|
+
|
|
31
|
+
For MAE and MSE, variance is computed from signed residuals
|
|
32
|
+
(y_true - y_pred) rather than raw metric values, so that a model
|
|
33
|
+
oscillating between equal positive and negative errors is correctly
|
|
34
|
+
identified as inconsistent. The mean score used for routing still comes
|
|
35
|
+
from the standard metric (MAE/MSE), only the variance term uses signed
|
|
36
|
+
residuals.
|
|
37
|
+
|
|
38
|
+
For all other metrics, variance is computed directly from the score matrix.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
task : str
|
|
43
|
+
'classification' or 'regression'.
|
|
44
|
+
metric : str or callable
|
|
45
|
+
Scoring function. 'mae' or 'mse' activate signed-residual variance;
|
|
46
|
+
all other metrics use the score matrix directly for variance.
|
|
47
|
+
mode : str
|
|
48
|
+
'max' if higher scores are better, 'min' if lower.
|
|
49
|
+
k : int
|
|
50
|
+
Neighbourhood size. Default: 10.
|
|
51
|
+
threshold : float
|
|
52
|
+
Competence gate. After per-neighbourhood normalisation (best=1.0,
|
|
53
|
+
worst=0.0), models below this fraction are excluded from softmax.
|
|
54
|
+
0.0 disables the gate; 1.0 reduces to OLA behaviour. Default: 0.5.
|
|
55
|
+
temperature : float, optional
|
|
56
|
+
Softmax sharpness. Lower = sharper routing toward the local best model.
|
|
57
|
+
Defaults to 0.1 for min-metrics, 1.0 otherwise.
|
|
58
|
+
preset : str
|
|
59
|
+
Neighbour search preset. Default: 'balanced'. See list_presets().
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(self, task, metric='mae', mode='min', k=10,
|
|
63
|
+
threshold=0.5, temperature=None, preset='balanced', **kwargs):
|
|
64
|
+
metric_name, metric_fn = resolve_metric(metric)
|
|
65
|
+
finder = make_finder(preset, k, **kwargs)
|
|
66
|
+
|
|
67
|
+
self._use_signed = metric_name in _SIGNED_METRICS
|
|
68
|
+
self._metric_name = metric_name
|
|
69
|
+
|
|
70
|
+
super().__init__(metric=metric_fn, mode=mode, neighbor_finder=finder)
|
|
71
|
+
|
|
72
|
+
self.task = task
|
|
73
|
+
self.threshold = threshold
|
|
74
|
+
self._temperature = temperature
|
|
75
|
+
self._var_matrix = None # (n_val, n_models) signed residuals, MAE/MSE only
|
|
76
|
+
|
|
77
|
+
def fit(self, features, y, preds_dict):
|
|
78
|
+
"""
|
|
79
|
+
Fit the routing model on validation data.
|
|
80
|
+
|
|
81
|
+
Parameters
|
|
82
|
+
----------
|
|
83
|
+
features : array-like, shape (n_val, n_features)
|
|
84
|
+
Validation features. Must not overlap with train or test data.
|
|
85
|
+
y : array-like, shape (n_val,)
|
|
86
|
+
Validation ground-truth labels or values.
|
|
87
|
+
preds_dict : dict[str, array-like]
|
|
88
|
+
Validation predictions keyed by model name.
|
|
89
|
+
"""
|
|
90
|
+
features, y, preds_dict = prep_fit_inputs(
|
|
91
|
+
features, y, preds_dict, self._metric_name
|
|
92
|
+
)
|
|
93
|
+
super().fit(features, y, preds_dict)
|
|
94
|
+
|
|
95
|
+
# Build signed residual matrix for variance computation (MAE/MSE only).
|
|
96
|
+
if self._use_signed:
|
|
97
|
+
n_val = len(y)
|
|
98
|
+
n_models = len(self.models)
|
|
99
|
+
self._var_matrix = np.zeros((n_val, n_models))
|
|
100
|
+
for j, name in enumerate(self.models):
|
|
101
|
+
preds = np.asarray(preds_dict[name])
|
|
102
|
+
self._var_matrix[:, j] = np.vectorize(_signed_residual)(y, preds)
|
|
103
|
+
|
|
104
|
+
def predict(self, x, temperature=None, threshold=None):
|
|
105
|
+
"""
|
|
106
|
+
Return per-sample model weights.
|
|
107
|
+
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
x : array-like, shape (n_features,) or (n_samples, n_features)
|
|
111
|
+
temperature : float, optional
|
|
112
|
+
Overrides the instance temperature for this call.
|
|
113
|
+
threshold : float, optional
|
|
114
|
+
Overrides the instance threshold for this call.
|
|
115
|
+
|
|
116
|
+
Returns
|
|
117
|
+
-------
|
|
118
|
+
dict or list of dict
|
|
119
|
+
Single sample: {model_name: weight}. Batch: list of such dicts.
|
|
120
|
+
"""
|
|
121
|
+
t = temperature if temperature is not None else (
|
|
122
|
+
self._temperature if self._temperature is not None else
|
|
123
|
+
(0.1 if self.mode == 'min' else 1.0))
|
|
124
|
+
th = threshold if threshold is not None else self.threshold
|
|
125
|
+
|
|
126
|
+
x = np.atleast_2d(to_numpy(x))
|
|
127
|
+
batch_size = x.shape[0]
|
|
128
|
+
|
|
129
|
+
distances, indices = self.model.kneighbors(x) # both (batch, k)
|
|
130
|
+
|
|
131
|
+
# Inverse-distance weights — same as DEWS-I.
|
|
132
|
+
inv_dist = 1.0 / np.maximum(distances, 1e-8) # (batch, k)
|
|
133
|
+
inv_dist_w = inv_dist / inv_dist.sum(axis=1, keepdims=True) # normalised, (batch, k)
|
|
134
|
+
|
|
135
|
+
# Inverse-distance weighted mean of each model's scores over K neighbours.
|
|
136
|
+
neighbor_scores = self.matrix[indices] # (batch, k, n_models)
|
|
137
|
+
avg_scores = (neighbor_scores * inv_dist_w[:, :, np.newaxis]).sum(axis=1) # (batch, n_models)
|
|
138
|
+
|
|
139
|
+
# Select source matrix for variance computation.
|
|
140
|
+
if self._use_signed:
|
|
141
|
+
var_source = self._var_matrix[indices] # (batch, k, n_models)
|
|
142
|
+
else:
|
|
143
|
+
var_source = neighbor_scores
|
|
144
|
+
|
|
145
|
+
# Inverse-distance weighted variance: σ²_w = Σ w_i * (x_i - μ_w)²
|
|
146
|
+
# For signed residuals the mean is computed from var_source, not avg_scores,
|
|
147
|
+
# so that the variance is internally consistent with its own mean.
|
|
148
|
+
w = inv_dist_w[:, :, np.newaxis] # (batch, k, 1)
|
|
149
|
+
var_mean = (var_source * w).sum(axis=1) # (batch, n_models)
|
|
150
|
+
residuals = var_source - var_mean[:, np.newaxis, :] # (batch, k, n_models)
|
|
151
|
+
local_var = (w * residuals ** 2).sum(axis=1) # (batch, n_models)
|
|
152
|
+
|
|
153
|
+
# Normalize scores to [0, 1] before applying variance penalty so that
|
|
154
|
+
# the penalty is dimensionless and consistent across metrics and scales.
|
|
155
|
+
local_min = avg_scores.min(axis=1, keepdims=True)
|
|
156
|
+
local_max = avg_scores.max(axis=1, keepdims=True)
|
|
157
|
+
local_range = local_max - local_min
|
|
158
|
+
norm_scores = (avg_scores - local_min) / np.where(local_range > 0, local_range, 1.0)
|
|
159
|
+
|
|
160
|
+
# Normalize variance to [0, 1] across models within each sample so the
|
|
161
|
+
# penalty magnitude is also scale-independent.
|
|
162
|
+
var_min = local_var.min(axis=1, keepdims=True)
|
|
163
|
+
var_max = local_var.max(axis=1, keepdims=True)
|
|
164
|
+
var_range = var_max - var_min
|
|
165
|
+
norm_var = (local_var - var_min) / np.where(var_range > 0, var_range, 1.0)
|
|
166
|
+
|
|
167
|
+
# Penalise inconsistent models: divide normalised score by (1 + normalised variance).
|
|
168
|
+
norm_scores = norm_scores / (1.0 + norm_var)
|
|
169
|
+
|
|
170
|
+
# Re-normalise after penalty so the gate threshold remains meaningful.
|
|
171
|
+
local_min = norm_scores.min(axis=1, keepdims=True)
|
|
172
|
+
local_max = norm_scores.max(axis=1, keepdims=True)
|
|
173
|
+
local_range = local_max - local_min
|
|
174
|
+
norm_scores = (norm_scores - local_min) / np.where(local_range > 0, local_range, 1.0)
|
|
175
|
+
|
|
176
|
+
# Zero out models below threshold.
|
|
177
|
+
if th > 0:
|
|
178
|
+
gate = norm_scores >= th
|
|
179
|
+
any_pass = gate.any(axis=1, keepdims=True)
|
|
180
|
+
gate = np.where(any_pass, gate, norm_scores == 1.0)
|
|
181
|
+
norm_scores = norm_scores * gate
|
|
182
|
+
|
|
183
|
+
# Softmax.
|
|
184
|
+
max_scores = norm_scores.max(axis=1, keepdims=True)
|
|
185
|
+
exp_scores = np.exp((norm_scores - max_scores) / t)
|
|
186
|
+
if th > 0:
|
|
187
|
+
exp_scores = exp_scores * gate
|
|
188
|
+
total = exp_scores.sum(axis=1, keepdims=True)
|
|
189
|
+
weights = np.where(total > 0,
|
|
190
|
+
exp_scores / np.where(total > 0, total, 1.0),
|
|
191
|
+
np.full_like(exp_scores, 1.0 / len(self.models)))
|
|
192
|
+
|
|
193
|
+
if batch_size == 1:
|
|
194
|
+
return dict(zip(self.models, weights[0]))
|
|
195
|
+
return [dict(zip(self.models, w)) for w in weights]
|