deskit 0.3.0__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {deskit-0.3.0/src/deskit.egg-info → deskit-1.0.0}/PKG-INFO +59 -49
  2. {deskit-0.3.0 → deskit-1.0.0}/README.md +58 -48
  3. {deskit-0.3.0 → deskit-1.0.0}/pyproject.toml +1 -1
  4. deskit-1.0.0/src/deskit/des/dewsiv.py +195 -0
  5. deskit-1.0.0/src/deskit/des/dewst.py +200 -0
  6. deskit-1.0.0/src/deskit/des/dewsv.py +185 -0
  7. deskit-1.0.0/src/deskit/des/lwsei.py +156 -0
  8. deskit-1.0.0/src/deskit/des/lwseu.py +133 -0
  9. {deskit-0.3.0 → deskit-1.0.0/src/deskit.egg-info}/PKG-INFO +59 -49
  10. {deskit-0.3.0 → deskit-1.0.0}/src/deskit.egg-info/SOURCES.txt +5 -1
  11. deskit-0.3.0/src/deskit/analysis.py +0 -377
  12. {deskit-0.3.0 → deskit-1.0.0}/LICENSE +0 -0
  13. {deskit-0.3.0 → deskit-1.0.0}/setup.cfg +0 -0
  14. {deskit-0.3.0 → deskit-1.0.0}/src/deskit/__init__.py +0 -0
  15. {deskit-0.3.0 → deskit-1.0.0}/src/deskit/_config.py +0 -0
  16. {deskit-0.3.0 → deskit-1.0.0}/src/deskit/base/__init__.py +0 -0
  17. {deskit-0.3.0 → deskit-1.0.0}/src/deskit/base/base.py +0 -0
  18. {deskit-0.3.0 → deskit-1.0.0}/src/deskit/base/knnbase.py +0 -0
  19. {deskit-0.3.0 → deskit-1.0.0}/src/deskit/des/__init__.py +0 -0
  20. {deskit-0.3.0 → deskit-1.0.0}/src/deskit/des/dewsi.py +0 -0
  21. {deskit-0.3.0 → deskit-1.0.0}/src/deskit/des/dewsu.py +0 -0
  22. {deskit-0.3.0 → deskit-1.0.0}/src/deskit/des/knorae.py +0 -0
  23. {deskit-0.3.0 → deskit-1.0.0}/src/deskit/des/knoraiu.py +0 -0
  24. {deskit-0.3.0 → deskit-1.0.0}/src/deskit/des/knorau.py +0 -0
  25. {deskit-0.3.0 → deskit-1.0.0}/src/deskit/des/ola.py +0 -0
  26. {deskit-0.3.0 → deskit-1.0.0}/src/deskit/metrics.py +0 -0
  27. {deskit-0.3.0 → deskit-1.0.0}/src/deskit/neighbors.py +0 -0
  28. {deskit-0.3.0 → deskit-1.0.0}/src/deskit/router.py +0 -0
  29. {deskit-0.3.0 → deskit-1.0.0}/src/deskit/utils.py +0 -0
  30. {deskit-0.3.0 → deskit-1.0.0}/src/deskit.egg-info/dependency_links.txt +0 -0
  31. {deskit-0.3.0 → deskit-1.0.0}/src/deskit.egg-info/requires.txt +0 -0
  32. {deskit-0.3.0 → deskit-1.0.0}/src/deskit.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deskit
3
- Version: 0.3.0
3
+ Version: 1.0.0
4
4
  Summary: A Python library for Dynamic Ensemble Selection
5
5
  Author: Tikhon Vodyanov
6
6
  License-Expression: MIT
@@ -95,7 +95,7 @@ NumPy (>= 1.21)
95
95
 
96
96
  ## Quick start
97
97
 
98
- Full explanation of the algorithms, syntax, and parameters is available in the [documentation](https://TikaaVo.github.io/deskit/).
98
+ For a more detailed understanding of how to use the library, consult the [documentation](https://TikaaVo.github.io/deskit/).
99
99
 
100
100
  ```python
101
101
  from deskit.des.knorau import KNORAU
@@ -150,14 +150,22 @@ weights = router.predict(X_test[i])
150
150
 
151
151
  ## Algorithms
152
152
 
153
- | Method | Best for | Notes |
154
- |-----------|---|----------------------------------------------------------------------------------------------------------|
155
- | `DEWSU` | Regression | Softmax over neighbourhood-averaged scores. Temperature controls sharpness. |
156
- | `DEWSI` | Regression | Like DEWS-U but scores are inverse-distance weighted. |
157
- | `KNORAU` | Classification | Vote-count weighting. Each model earns one vote per neighbour it correctly classifies. |
158
- | `KNORAE` | Classification | Intersection-based. Only models correct on all neighbours survive; falls back to smaller neighbourhoods. |
159
- | `KNORAIU` | Classification | Like KNORA-U but votes are inverse-distance weighted. |
160
- | `OLA` | Both | Hard selection: only the single best model in the neighbourhood contributes. |
153
+ Full explanation of the algorithms, syntax, and parameters is available in the [documentation](https://TikaaVo.github.io/deskit/).
154
+ If you're struggling to decide on which algorithm to use, see the [algorithm selection guide](https://TikaaVo.github.io/deskit/selection).
155
+
156
+ | Method | Best for | Notes |
157
+ |------------|----------------|--------------------------------------------------------------------------------------------------------|
158
+ | `DEWS-U` | Regression | Softmax over neighborhood-averaged scores. Temperature controls sharpness. |
159
+ | `DEWS-I` | Regression | Like DEWS-U but scores are inverse-distance weighted. |
160
+ | `DEWS-T` | Both | Like DEWS-I but fits a weighted trend line over neighbor scores. |
161
+ | `DEWS-V` | Regression | Like DEWS-U but scores are variance-penalized. |
162
+ | `DEWS-IV` | Regression | Like DEWS-V but scores are also inverse-distance weighted. |
163
+ | `LWSE-U` | Both | Per-sample NNLS weight estimation over the local neighbourhood. |
164
+ | `LWSE-I` | Both | Like LWSE-U but rows are inverse-distance weighted. |
165
+ | `KNORA-U` | Classification | Each model earns one vote per neighbor it correctly classifies. |
166
+ | `KNORA-E` | Classification | Only models correct on all neighbors survive; falls back to smaller neighborhoods. |
167
+ | `KNORA-IU` | Classification | Like KNORA-U but votes are inverse-distance weighted. |
168
+ | `OLA` | Both | Hard selection: only the single best model in the neighborhood contributes. |
161
169
 
162
170
  ---
163
171
 
@@ -218,74 +226,76 @@ passed features either need to be run through a feature extractor beforehand, su
218
226
 
219
227
  ## Benchmark results
220
228
 
221
- 100-seed benchmark (seeds 0–99) on standard sklearn and OpenML datasets. "Best Single" is the best
229
+ 20-seed benchmark (seeds 0–19) on standard sklearn and OpenML datasets. "Best Single" is the best
222
230
  individual model selected on the validation set. "Simple Average" is uniform
223
231
  equal-weight blending, included as a baseline.
224
232
 
225
233
  It is important to consider that these experiments were run with the default hyperparameters, meaning that
226
234
  they could vary greatly with different values, and results could improve with tuning.
227
- For a more detailed benchmark breakdown, see the [documentation](https://TikaaVo.github.io/deskit/).
235
+ For a more detailed benchmark breakdown, see the [benchmark in the documentation](https://TikaaVo.github.io/deskit/benchmark).
228
236
  To see the full results, see `results.txt` in the `tests` folder.
229
237
 
230
- Pool: KNN, Decision Tree, SVR, Ridge, Bayesian Ridge.
231
-
232
238
  This pool was selected for having variability in architectures while avoiding a single dominant model.
233
239
 
234
- deskit algorithms tested: OLA, DEWS-U, DEWS-I, KNORA-U, KNORA-E, KNORA-IU.
240
+ deskit algorithms tested: OLA, DEWS-U, DEWS-I, DEWS-T, DEWS-V, DEWS-IV, LWSE-U, LWSE-I, KNORA-U, KNORA-E, KNORA-IU.
235
241
 
236
242
  ### Regression (MAE, lower is better)
237
243
 
238
- % shown as delta vs Best Single. 100-seed mean.
244
+ Pool: KNN, Decision Tree, SVR, Ridge, Bayesian Ridge.
245
+
246
+ % shown as delta vs Best Single. 20-seed mean.
239
247
 
240
- | Dataset | Best Single | Simple Avg | deskit best |
241
- |------------------------------|-------------|------------|-------------------------|
242
- | California Housing (sklearn) | 0.3955 | +7.93% | **−2.68%** (DEWS-I) |
243
- | Bike Sharing (OpenML) | 51.604 | +48.39% | **−6.25%** (DEWS-I) |
244
- | Abalone (OpenML) | **1.4923** | +1.29% | +1.61% (KNORA-IU) |
245
- | Diabetes (sklearn) | **44.986** | +2.98% | +0.88% (DEWS-I) |
246
- | Concrete Strength (OpenML) | 5.3934 | +21.30% | **−2.85%** (KNORA-IU) |
248
+ | Dataset | Best Single | Simple Avg | deskit best |
249
+ |------------------------------|-------------|------------|---------------------------|
250
+ | California Housing (sklearn) | 0.3956 | +7.99% | **−2.54%** (DEWS-I) |
251
+ | Bike Sharing (OpenML) | 51.678 | +47.77% | **−6.86%** (DEWS-I) |
252
+ | Abalone (OpenML) | **1.4981** | +1.14% | +1.47% (KNORA-U/KNORA-IU) |
253
+ | Diabetes (sklearn) | **44.504** | +3.18% | +0.86% (DEWS-IV) |
254
+ | Concrete Strength (OpenML) | 5.2686 | +23.66% | **−5.41%** (LWSE-I) |
247
255
 
248
256
  deskit beats best single and simple averaging on 3/5 regression datasets. This shows how DES can provide a
249
257
  strong boost if used on the right dataset, but it might be counterproductive if used blindly.
250
258
 
251
259
  KNORA variants are designed for classification, which explains the poor performance
252
- on regression datasets; However, some exception can occur in certain datasets, either where
253
- feature space is has hard clusters (like in Concrete Strength) or when the target is discrete
260
+ on regression datasets; However, some exceptions can occur in certain datasets when the target is discrete
254
261
  and classification-like (like in Abalone).
255
262
 
263
+ DEWS-I and LWSE-I show the largest improvements on their respective datasets.
264
+
256
265
  ### Classification (Accuracy, higher is better)
257
266
 
258
- % shown as delta vs Best Single. 100-seed mean.
267
+ Pool: KNN, Decision Tree, Gaussian NB, SVM-RBF, Logistic Regression.
268
+
269
+ % shown as delta vs Best Single. 20-seed mean.
259
270
 
260
- | Dataset | Best Single | Simple Avg | deskit best |
261
- |------------------------|-------------|------------|-------------------------|
262
- | HAR (OpenML) | 98.24% | −0.32% | **+0.14%** (DEWS-I) |
263
- | Yeast (OpenML) | 59.19% | +0.46% | **+1.48%** (KNORA-IU) |
264
- | Image Segment (OpenML) | 93.65% | +1.70% | **+2.33%** (KNORA-IU) |
265
- | Waveform (OpenML) | **86.28%** | −1.04% | −0.55% (DEWS-I) |
266
- | Vowel (OpenML) | 90.54% | −1.81% | **+0.93%** (KNORA-IU) |
271
+ | Dataset | Best Single | Simple Avg | deskit best |
272
+ |------------------------|-------------|------------|--------------------------------|
273
+ | HAR (OpenML) | 98.24% | −0.33% | **+0.16%** (DEWS-T) |
274
+ | Yeast (OpenML) | 58.87% | +0.77% | **+1.66%** (KNORA-IU) |
275
+ | Image Segment (OpenML) | 93.70% | +1.40% | **+2.25%** (DEWS-T / DEWS-IV) |
276
+ | Waveform (OpenML) | **85.91%** | −0.98% | −0.39% (DEWS-T) |
277
+ | Vowel (OpenML) | 89.95% | −2.05% | **+2.95%** (LWSE-I) |
267
278
 
268
- deskit beats or matches best single and simple averaging on 4/5 classification datasets. As seen on regression, DES
269
- can improve or hurt performance, so it must be used wisely, but if used correctly it can show promising results.
279
+ deskit beats or matches best single and simple averaging on 4/5 classification datasets.
270
280
 
271
281
  ### Speed (mean ms fit + predict, 20 seeds, all tested algorithms combined)
272
282
 
273
- Consider that usually it is recommended to only use one algorithm at a time, this benchmark ran six of them at the
274
- same time, so with a single one runtime is expected to be about 6x faster. For this benchmark, `preset='balanced'` was used,
283
+ Consider that usually it is recommended to only use one algorithm at a time, this benchmark ran eleven of them at the
284
+ same time, so with a single one runtime is expected to be about 11x faster. For this benchmark, `preset='balanced'` was used,
275
285
  so the backend was an ANN algorithm with FAISS IVF.
276
286
 
277
- | Dataset | deskit |
278
- |--------------------|-----------|
279
- | California Housing | 159.8 ms |
280
- | Bike Sharing | 130.3 ms |
281
- | Abalone | 32.9 ms |
282
- | Diabetes | 8.2 ms |
283
- | Conrete Strength | 10.8 ms |
284
- | HAR | 352.0 ms |
285
- | Yeast | 18.6 ms |
286
- | Image Segment | 32.4 ms |
287
- | Waveform | 58.7 ms |
288
- | Vowel | 19.6 ms |
287
+ | Dataset | deskit (11 algorithms) |
288
+ |--------------------|------------------------|
289
+ | California Housing | 351.0 ms |
290
+ | Bike Sharing | 283.5 ms |
291
+ | Abalone | 72.9 ms |
292
+ | Diabetes | 14.0 ms |
293
+ | Concrete Strength | 22.5 ms |
294
+ | HAR | 693.1 ms |
295
+ | Yeast | 44.7 ms |
296
+ | Image Segment | 69.9 ms |
297
+ | Waveform | 124.5 ms |
298
+ | Vowel | 39.0 ms |
289
299
 
290
300
  deskit caches all model predictions on the validation set at fit time and reads
291
301
  from that matrix at inference.
@@ -64,7 +64,7 @@ NumPy (>= 1.21)
64
64
 
65
65
  ## Quick start
66
66
 
67
- Full explanation of the algorithms, syntax, and parameters is available in the [documentation](https://TikaaVo.github.io/deskit/).
67
+ For a more detailed understanding of how to use the library, consult the [documentation](https://TikaaVo.github.io/deskit/).
68
68
 
69
69
  ```python
70
70
  from deskit.des.knorau import KNORAU
@@ -119,14 +119,22 @@ weights = router.predict(X_test[i])
119
119
 
120
120
  ## Algorithms
121
121
 
122
- | Method | Best for | Notes |
123
- |-----------|---|----------------------------------------------------------------------------------------------------------|
124
- | `DEWSU` | Regression | Softmax over neighbourhood-averaged scores. Temperature controls sharpness. |
125
- | `DEWSI` | Regression | Like DEWS-U but scores are inverse-distance weighted. |
126
- | `KNORAU` | Classification | Vote-count weighting. Each model earns one vote per neighbour it correctly classifies. |
127
- | `KNORAE` | Classification | Intersection-based. Only models correct on all neighbours survive; falls back to smaller neighbourhoods. |
128
- | `KNORAIU` | Classification | Like KNORA-U but votes are inverse-distance weighted. |
129
- | `OLA` | Both | Hard selection: only the single best model in the neighbourhood contributes. |
122
+ Full explanation of the algorithms, syntax, and parameters is available in the [documentation](https://TikaaVo.github.io/deskit/).
123
+ If you're struggling to decide on which algorithm to use, see the [algorithm selection guide](https://TikaaVo.github.io/deskit/selection).
124
+
125
+ | Method | Best for | Notes |
126
+ |------------|----------------|--------------------------------------------------------------------------------------------------------|
127
+ | `DEWS-U` | Regression | Softmax over neighborhood-averaged scores. Temperature controls sharpness. |
128
+ | `DEWS-I` | Regression | Like DEWS-U but scores are inverse-distance weighted. |
129
+ | `DEWS-T` | Both | Like DEWS-I but fits a weighted trend line over neighbor scores. |
130
+ | `DEWS-V` | Regression | Like DEWS-U but scores are variance-penalized. |
131
+ | `DEWS-IV` | Regression | Like DEWS-V but scores are also inverse-distance weighted. |
132
+ | `LWSE-U` | Both | Per-sample NNLS weight estimation over the local neighbourhood. |
133
+ | `LWSE-I` | Both | Like LWSE-U but rows are inverse-distance weighted. |
134
+ | `KNORA-U` | Classification | Each model earns one vote per neighbor it correctly classifies. |
135
+ | `KNORA-E` | Classification | Only models correct on all neighbors survive; falls back to smaller neighborhoods. |
136
+ | `KNORA-IU` | Classification | Like KNORA-U but votes are inverse-distance weighted. |
137
+ | `OLA` | Both | Hard selection: only the single best model in the neighborhood contributes. |
130
138
 
131
139
  ---
132
140
 
@@ -187,74 +195,76 @@ passed features either need to be run through a feature extractor beforehand, su
187
195
 
188
196
  ## Benchmark results
189
197
 
190
- 100-seed benchmark (seeds 0–99) on standard sklearn and OpenML datasets. "Best Single" is the best
198
+ 20-seed benchmark (seeds 0–19) on standard sklearn and OpenML datasets. "Best Single" is the best
191
199
  individual model selected on the validation set. "Simple Average" is uniform
192
200
  equal-weight blending, included as a baseline.
193
201
 
194
202
  It is important to consider that these experiments were run with the default hyperparameters, meaning that
195
203
  they could vary greatly with different values, and results could improve with tuning.
196
- For a more detailed benchmark breakdown, see the [documentation](https://TikaaVo.github.io/deskit/).
204
+ For a more detailed benchmark breakdown, see the [benchmark in the documentation](https://TikaaVo.github.io/deskit/benchmark).
197
205
  To see the full results, see `results.txt` in the `tests` folder.
198
206
 
199
- Pool: KNN, Decision Tree, SVR, Ridge, Bayesian Ridge.
200
-
201
207
  This pool was selected for having variability in architectures while avoiding a single dominant model.
202
208
 
203
- deskit algorithms tested: OLA, DEWS-U, DEWS-I, KNORA-U, KNORA-E, KNORA-IU.
209
+ deskit algorithms tested: OLA, DEWS-U, DEWS-I, DEWS-T, DEWS-V, DEWS-IV, LWSE-U, LWSE-I, KNORA-U, KNORA-E, KNORA-IU.
204
210
 
205
211
  ### Regression (MAE, lower is better)
206
212
 
207
- % shown as delta vs Best Single. 100-seed mean.
213
+ Pool: KNN, Decision Tree, SVR, Ridge, Bayesian Ridge.
214
+
215
+ % shown as delta vs Best Single. 20-seed mean.
208
216
 
209
- | Dataset | Best Single | Simple Avg | deskit best |
210
- |------------------------------|-------------|------------|-------------------------|
211
- | California Housing (sklearn) | 0.3955 | +7.93% | **−2.68%** (DEWS-I) |
212
- | Bike Sharing (OpenML) | 51.604 | +48.39% | **−6.25%** (DEWS-I) |
213
- | Abalone (OpenML) | **1.4923** | +1.29% | +1.61% (KNORA-IU) |
214
- | Diabetes (sklearn) | **44.986** | +2.98% | +0.88% (DEWS-I) |
215
- | Concrete Strength (OpenML) | 5.3934 | +21.30% | **−2.85%** (KNORA-IU) |
217
+ | Dataset | Best Single | Simple Avg | deskit best |
218
+ |------------------------------|-------------|------------|---------------------------|
219
+ | California Housing (sklearn) | 0.3956 | +7.99% | **−2.54%** (DEWS-I) |
220
+ | Bike Sharing (OpenML) | 51.678 | +47.77% | **−6.86%** (DEWS-I) |
221
+ | Abalone (OpenML) | **1.4981** | +1.14% | +1.47% (KNORA-U/KNORA-IU) |
222
+ | Diabetes (sklearn) | **44.504** | +3.18% | +0.86% (DEWS-IV) |
223
+ | Concrete Strength (OpenML) | 5.2686 | +23.66% | **−5.41%** (LWSE-I) |
216
224
 
217
225
  deskit beats best single and simple averaging on 3/5 regression datasets. This shows how DES can provide a
218
226
  strong boost if used on the right dataset, but it might be counterproductive if used blindly.
219
227
 
220
228
  KNORA variants are designed for classification, which explains the poor performance
221
- on regression datasets; However, some exception can occur in certain datasets, either where
222
- feature space is has hard clusters (like in Concrete Strength) or when the target is discrete
229
+ on regression datasets; However, some exceptions can occur in certain datasets when the target is discrete
223
230
  and classification-like (like in Abalone).
224
231
 
232
+ DEWS-I and LWSE-I show the largest improvements on their respective datasets.
233
+
225
234
  ### Classification (Accuracy, higher is better)
226
235
 
227
- % shown as delta vs Best Single. 100-seed mean.
236
+ Pool: KNN, Decision Tree, Gaussian NB, SVM-RBF, Logistic Regression.
237
+
238
+ % shown as delta vs Best Single. 20-seed mean.
228
239
 
229
- | Dataset | Best Single | Simple Avg | deskit best |
230
- |------------------------|-------------|------------|-------------------------|
231
- | HAR (OpenML) | 98.24% | −0.32% | **+0.14%** (DEWS-I) |
232
- | Yeast (OpenML) | 59.19% | +0.46% | **+1.48%** (KNORA-IU) |
233
- | Image Segment (OpenML) | 93.65% | +1.70% | **+2.33%** (KNORA-IU) |
234
- | Waveform (OpenML) | **86.28%** | −1.04% | −0.55% (DEWS-I) |
235
- | Vowel (OpenML) | 90.54% | −1.81% | **+0.93%** (KNORA-IU) |
240
+ | Dataset | Best Single | Simple Avg | deskit best |
241
+ |------------------------|-------------|------------|--------------------------------|
242
+ | HAR (OpenML) | 98.24% | −0.33% | **+0.16%** (DEWS-T) |
243
+ | Yeast (OpenML) | 58.87% | +0.77% | **+1.66%** (KNORA-IU) |
244
+ | Image Segment (OpenML) | 93.70% | +1.40% | **+2.25%** (DEWS-T / DEWS-IV) |
245
+ | Waveform (OpenML) | **85.91%** | −0.98% | −0.39% (DEWS-T) |
246
+ | Vowel (OpenML) | 89.95% | −2.05% | **+2.95%** (LWSE-I) |
236
247
 
237
- deskit beats or matches best single and simple averaging on 4/5 classification datasets. As seen on regression, DES
238
- can improve or hurt performance, so it must be used wisely, but if used correctly it can show promising results.
248
+ deskit beats or matches best single and simple averaging on 4/5 classification datasets.
239
249
 
240
250
  ### Speed (mean ms fit + predict, 20 seeds, all tested algorithms combined)
241
251
 
242
- Consider that usually it is recommended to only use one algorithm at a time, this benchmark ran six of them at the
243
- same time, so with a single one runtime is expected to be about 6x faster. For this benchmark, `preset='balanced'` was used,
252
+ Consider that usually it is recommended to only use one algorithm at a time, this benchmark ran eleven of them at the
253
+ same time, so with a single one runtime is expected to be about 11x faster. For this benchmark, `preset='balanced'` was used,
244
254
  so the backend was an ANN algorithm with FAISS IVF.
245
255
 
246
- | Dataset | deskit |
247
- |--------------------|-----------|
248
- | California Housing | 159.8 ms |
249
- | Bike Sharing | 130.3 ms |
250
- | Abalone | 32.9 ms |
251
- | Diabetes | 8.2 ms |
252
- | Conrete Strength | 10.8 ms |
253
- | HAR | 352.0 ms |
254
- | Yeast | 18.6 ms |
255
- | Image Segment | 32.4 ms |
256
- | Waveform | 58.7 ms |
257
- | Vowel | 19.6 ms |
256
+ | Dataset | deskit (11 algorithms) |
257
+ |--------------------|------------------------|
258
+ | California Housing | 351.0 ms |
259
+ | Bike Sharing | 283.5 ms |
260
+ | Abalone | 72.9 ms |
261
+ | Diabetes | 14.0 ms |
262
+ | Concrete Strength | 22.5 ms |
263
+ | HAR | 693.1 ms |
264
+ | Yeast | 44.7 ms |
265
+ | Image Segment | 69.9 ms |
266
+ | Waveform | 124.5 ms |
267
+ | Vowel | 39.0 ms |
258
268
 
259
269
  deskit caches all model predictions on the validation set at fit time and reads
260
270
  from that matrix at inference.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "deskit"
7
- version = "0.3.0"
7
+ version = "1.0.0"
8
8
  description = "A Python library for Dynamic Ensemble Selection"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -0,0 +1,195 @@
1
+ """
2
+ DEWS-IV: Distance-weighted Ensemble with Softmax — Inverse-distance + Variance-penalised.
3
+ """
4
+ from deskit.base.knnbase import KNNBase
5
+ from deskit._config import make_finder, resolve_metric, prep_fit_inputs
6
+ from deskit.utils import to_numpy
7
+ import numpy as np
8
+
9
+
10
+ _SIGNED_METRICS = {'mae', 'mse'}
11
+
12
+
13
+ def _signed_residual(y_true, y_pred):
14
+ return float(y_true) - float(y_pred)
15
+
16
+
17
+ class DEWSIV(KNNBase):
18
+ """
19
+ DEWS-IV: Distance-weighted Ensemble with Softmax — Inverse-distance + Variance-penalised.
20
+
21
+ Combines DEWS-I and DEWS-V. The mean score is inverse-distance weighted
22
+ (closer neighbours contribute more), and the variance penalty is also
23
+ computed with the same inverse-distance weights, so erratic behaviour
24
+ close to the test point is penalised more heavily than erratic behaviour
25
+ among distant neighbours.
26
+
27
+ Both scores and variance are normalised to [0, 1] within each neighbourhood
28
+ before the penalty is applied, making the adjustment dimensionless and
29
+ consistent regardless of metric scale or task type.
30
+
31
+ For MAE and MSE, variance is computed from signed residuals
32
+ (y_true - y_pred) rather than raw metric values, so that a model
33
+ oscillating between equal positive and negative errors is correctly
34
+ identified as inconsistent. The mean score used for routing still comes
35
+ from the standard metric (MAE/MSE), only the variance term uses signed
36
+ residuals.
37
+
38
+ For all other metrics, variance is computed directly from the score matrix.
39
+
40
+ Parameters
41
+ ----------
42
+ task : str
43
+ 'classification' or 'regression'.
44
+ metric : str or callable
45
+ Scoring function. 'mae' or 'mse' activate signed-residual variance;
46
+ all other metrics use the score matrix directly for variance.
47
+ mode : str
48
+ 'max' if higher scores are better, 'min' if lower.
49
+ k : int
50
+ Neighbourhood size. Default: 10.
51
+ threshold : float
52
+ Competence gate. After per-neighbourhood normalisation (best=1.0,
53
+ worst=0.0), models below this fraction are excluded from softmax.
54
+ 0.0 disables the gate; 1.0 reduces to OLA behaviour. Default: 0.5.
55
+ temperature : float, optional
56
+ Softmax sharpness. Lower = sharper routing toward the local best model.
57
+ Defaults to 0.1 for min-metrics, 1.0 otherwise.
58
+ preset : str
59
+ Neighbour search preset. Default: 'balanced'. See list_presets().
60
+ """
61
+
62
+ def __init__(self, task, metric='mae', mode='min', k=10,
63
+ threshold=0.5, temperature=None, preset='balanced', **kwargs):
64
+ metric_name, metric_fn = resolve_metric(metric)
65
+ finder = make_finder(preset, k, **kwargs)
66
+
67
+ self._use_signed = metric_name in _SIGNED_METRICS
68
+ self._metric_name = metric_name
69
+
70
+ super().__init__(metric=metric_fn, mode=mode, neighbor_finder=finder)
71
+
72
+ self.task = task
73
+ self.threshold = threshold
74
+ self._temperature = temperature
75
+ self._var_matrix = None # (n_val, n_models) signed residuals, MAE/MSE only
76
+
77
+ def fit(self, features, y, preds_dict):
78
+ """
79
+ Fit the routing model on validation data.
80
+
81
+ Parameters
82
+ ----------
83
+ features : array-like, shape (n_val, n_features)
84
+ Validation features. Must not overlap with train or test data.
85
+ y : array-like, shape (n_val,)
86
+ Validation ground-truth labels or values.
87
+ preds_dict : dict[str, array-like]
88
+ Validation predictions keyed by model name.
89
+ """
90
+ features, y, preds_dict = prep_fit_inputs(
91
+ features, y, preds_dict, self._metric_name
92
+ )
93
+ super().fit(features, y, preds_dict)
94
+
95
+ # Build signed residual matrix for variance computation (MAE/MSE only).
96
+ if self._use_signed:
97
+ n_val = len(y)
98
+ n_models = len(self.models)
99
+ self._var_matrix = np.zeros((n_val, n_models))
100
+ for j, name in enumerate(self.models):
101
+ preds = np.asarray(preds_dict[name])
102
+ self._var_matrix[:, j] = np.vectorize(_signed_residual)(y, preds)
103
+
104
+ def predict(self, x, temperature=None, threshold=None):
105
+ """
106
+ Return per-sample model weights.
107
+
108
+ Parameters
109
+ ----------
110
+ x : array-like, shape (n_features,) or (n_samples, n_features)
111
+ temperature : float, optional
112
+ Overrides the instance temperature for this call.
113
+ threshold : float, optional
114
+ Overrides the instance threshold for this call.
115
+
116
+ Returns
117
+ -------
118
+ dict or list of dict
119
+ Single sample: {model_name: weight}. Batch: list of such dicts.
120
+ """
121
+ t = temperature if temperature is not None else (
122
+ self._temperature if self._temperature is not None else
123
+ (0.1 if self.mode == 'min' else 1.0))
124
+ th = threshold if threshold is not None else self.threshold
125
+
126
+ x = np.atleast_2d(to_numpy(x))
127
+ batch_size = x.shape[0]
128
+
129
+ distances, indices = self.model.kneighbors(x) # both (batch, k)
130
+
131
+ # Inverse-distance weights — same as DEWS-I.
132
+ inv_dist = 1.0 / np.maximum(distances, 1e-8) # (batch, k)
133
+ inv_dist_w = inv_dist / inv_dist.sum(axis=1, keepdims=True) # normalised, (batch, k)
134
+
135
+ # Inverse-distance weighted mean of each model's scores over K neighbours.
136
+ neighbor_scores = self.matrix[indices] # (batch, k, n_models)
137
+ avg_scores = (neighbor_scores * inv_dist_w[:, :, np.newaxis]).sum(axis=1) # (batch, n_models)
138
+
139
+ # Select source matrix for variance computation.
140
+ if self._use_signed:
141
+ var_source = self._var_matrix[indices] # (batch, k, n_models)
142
+ else:
143
+ var_source = neighbor_scores
144
+
145
+ # Inverse-distance weighted variance: σ²_w = Σ w_i * (x_i - μ_w)²
146
+ # For signed residuals the mean is computed from var_source, not avg_scores,
147
+ # so that the variance is internally consistent with its own mean.
148
+ w = inv_dist_w[:, :, np.newaxis] # (batch, k, 1)
149
+ var_mean = (var_source * w).sum(axis=1) # (batch, n_models)
150
+ residuals = var_source - var_mean[:, np.newaxis, :] # (batch, k, n_models)
151
+ local_var = (w * residuals ** 2).sum(axis=1) # (batch, n_models)
152
+
153
+ # Normalize scores to [0, 1] before applying variance penalty so that
154
+ # the penalty is dimensionless and consistent across metrics and scales.
155
+ local_min = avg_scores.min(axis=1, keepdims=True)
156
+ local_max = avg_scores.max(axis=1, keepdims=True)
157
+ local_range = local_max - local_min
158
+ norm_scores = (avg_scores - local_min) / np.where(local_range > 0, local_range, 1.0)
159
+
160
+ # Normalize variance to [0, 1] across models within each sample so the
161
+ # penalty magnitude is also scale-independent.
162
+ var_min = local_var.min(axis=1, keepdims=True)
163
+ var_max = local_var.max(axis=1, keepdims=True)
164
+ var_range = var_max - var_min
165
+ norm_var = (local_var - var_min) / np.where(var_range > 0, var_range, 1.0)
166
+
167
+ # Penalise inconsistent models: divide normalised score by (1 + normalised variance).
168
+ norm_scores = norm_scores / (1.0 + norm_var)
169
+
170
+ # Re-normalise after penalty so the gate threshold remains meaningful.
171
+ local_min = norm_scores.min(axis=1, keepdims=True)
172
+ local_max = norm_scores.max(axis=1, keepdims=True)
173
+ local_range = local_max - local_min
174
+ norm_scores = (norm_scores - local_min) / np.where(local_range > 0, local_range, 1.0)
175
+
176
+ # Zero out models below threshold.
177
+ if th > 0:
178
+ gate = norm_scores >= th
179
+ any_pass = gate.any(axis=1, keepdims=True)
180
+ gate = np.where(any_pass, gate, norm_scores == 1.0)
181
+ norm_scores = norm_scores * gate
182
+
183
+ # Softmax.
184
+ max_scores = norm_scores.max(axis=1, keepdims=True)
185
+ exp_scores = np.exp((norm_scores - max_scores) / t)
186
+ if th > 0:
187
+ exp_scores = exp_scores * gate
188
+ total = exp_scores.sum(axis=1, keepdims=True)
189
+ weights = np.where(total > 0,
190
+ exp_scores / np.where(total > 0, total, 1.0),
191
+ np.full_like(exp_scores, 1.0 / len(self.models)))
192
+
193
+ if batch_size == 1:
194
+ return dict(zip(self.models, weights[0]))
195
+ return [dict(zip(self.models, w)) for w in weights]