deskit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deskit-0.1.0/LICENSE +21 -0
- deskit-0.1.0/PKG-INFO +289 -0
- deskit-0.1.0/README.md +258 -0
- deskit-0.1.0/pyproject.toml +37 -0
- deskit-0.1.0/setup.cfg +4 -0
- deskit-0.1.0/src/deskit/__init__.py +43 -0
- deskit-0.1.0/src/deskit/_config.py +186 -0
- deskit-0.1.0/src/deskit/analysis.py +377 -0
- deskit-0.1.0/src/deskit/base/__init__.py +4 -0
- deskit-0.1.0/src/deskit/base/base.py +11 -0
- deskit-0.1.0/src/deskit/base/knnbase.py +54 -0
- deskit-0.1.0/src/deskit/des/__init__.py +7 -0
- deskit-0.1.0/src/deskit/des/knndws.py +122 -0
- deskit-0.1.0/src/deskit/des/knorae.py +120 -0
- deskit-0.1.0/src/deskit/des/knoraiu.py +113 -0
- deskit-0.1.0/src/deskit/des/knorau.py +107 -0
- deskit-0.1.0/src/deskit/des/ola.py +85 -0
- deskit-0.1.0/src/deskit/metrics.py +75 -0
- deskit-0.1.0/src/deskit/neighbors.py +335 -0
- deskit-0.1.0/src/deskit/router.py +184 -0
- deskit-0.1.0/src/deskit/utils.py +27 -0
- deskit-0.1.0/src/deskit.egg-info/PKG-INFO +289 -0
- deskit-0.1.0/src/deskit.egg-info/SOURCES.txt +24 -0
- deskit-0.1.0/src/deskit.egg-info/dependency_links.txt +1 -0
- deskit-0.1.0/src/deskit.egg-info/requires.txt +19 -0
- deskit-0.1.0/src/deskit.egg-info/top_level.txt +1 -0
deskit-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 TikaaVo
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
deskit-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: deskit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python library for Dynamic Ensemble Selection
|
|
5
|
+
Author: Tikhon Vodyanov
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/TikaaVo/deskit
|
|
8
|
+
Project-URL: Repository, https://github.com/TikaaVo/deskit.git
|
|
9
|
+
Keywords: ensemble,machine learning,dynamic ensemble selection,model routing
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
13
|
+
Requires-Python: >=3.8
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: numpy>=1.21
|
|
17
|
+
Provides-Extra: exact
|
|
18
|
+
Requires-Dist: scikit-learn>=1.0; extra == "exact"
|
|
19
|
+
Provides-Extra: faiss
|
|
20
|
+
Requires-Dist: faiss-cpu>=1.7; extra == "faiss"
|
|
21
|
+
Provides-Extra: hnsw
|
|
22
|
+
Requires-Dist: hnswlib>=0.7; extra == "hnsw"
|
|
23
|
+
Provides-Extra: annoy
|
|
24
|
+
Requires-Dist: annoy>=1.17; extra == "annoy"
|
|
25
|
+
Provides-Extra: all
|
|
26
|
+
Requires-Dist: scikit-learn>=1.0; extra == "all"
|
|
27
|
+
Requires-Dist: faiss-cpu>=1.7; extra == "all"
|
|
28
|
+
Requires-Dist: hnswlib>=0.7; extra == "all"
|
|
29
|
+
Requires-Dist: annoy>=1.17; extra == "all"
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# deskit
|
|
33
|
+
|
|
34
|
+
[deskit](https://TikaaVo.github.io/deskit/) is a flexible, light, and easy-to-use ensembling library that implements
|
|
35
|
+
Dynamic Ensemble Selection (DES) algorithms for ensembling multiple ML models
|
|
36
|
+
on a singular dataset.
|
|
37
|
+
|
|
38
|
+
The library works entirely with data, taking as input a validation dataset
|
|
39
|
+
along with pre-computed predictions and outputting a dictionary of weights
|
|
40
|
+
per model. This means that it can be used with any library or model without
|
|
41
|
+
requiring any wrappers, including custom models, popular ML libraries, and APIs.
|
|
42
|
+
|
|
43
|
+
deskit contains multiple different DES algorithms, and it works with both classification
|
|
44
|
+
and regression.
|
|
45
|
+
|
|
46
|
+
# Dynamic Ensemble Selection
|
|
47
|
+
|
|
48
|
+
Ensemble learning in machine learning refers to when multiple models trained on a
|
|
49
|
+
single dataset combine their predictions to create a single, more accurate prediction,
|
|
50
|
+
usually through weighted voting or picking the best model.
|
|
51
|
+
|
|
52
|
+
DES refers to techniques where the models or their voting weights are selected dynamically
|
|
53
|
+
for every test case. This selection bases on the idea of competence regions, which is the
|
|
54
|
+
concept that there are regions of feature space where certain models perform particularly well,
|
|
55
|
+
so every base model can be an expert in a different region.
|
|
56
|
+
Only the most competent, or an ensemble of the most competent models is selected for the prediction.
|
|
57
|
+
|
|
58
|
+
Through empirical studies, DES has been shown to perform best with small-sized, imbalanced, or
|
|
59
|
+
heterogeneous datasets, as well as non-stationary data (concept drift), models that haven't perfected a dataset,
|
|
60
|
+
and when used on an ensemble of models with differing architectures and perspectives.
|
|
61
|
+
|
|
62
|
+
However, DES is not an automatic improvement. It tends to perform worse when datasets are homogeneous or have low diversity,
|
|
63
|
+
when the validation set isn't a good representation of the test set, when using very high dimensional data or few training samples,
|
|
64
|
+
or when a single model dominates a dataset.
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
## Installation
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
pip install deskit
|
|
72
|
+
|
|
73
|
+
# The library runs with Nearest Neighbors from sklearn for exact KNN
|
|
74
|
+
pip install scikit-learn
|
|
75
|
+
|
|
76
|
+
# Alternatively, ANN can be used for faster runtimes at the cost of
|
|
77
|
+
# slightly lower accuracy. The following three are supported;
|
|
78
|
+
# Install the one you want to use.
|
|
79
|
+
pip install faiss-cpu # FAISS (good default for most datasets)
|
|
80
|
+
pip install annoy # Annoy (memory-efficient, simple)
|
|
81
|
+
pip install hnswlib # HNSW (best for high-dimensional data)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Dependencies
|
|
87
|
+
|
|
88
|
+
Python (>= 3.9)
|
|
89
|
+
|
|
90
|
+
NumPy (>= 1.21)
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Quick start
|
|
95
|
+
|
|
96
|
+
Full explanation of the algorithms, syntax, and parameters is available in the [documentation](https://TikaaVo.github.io/deskit/).
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from deskit.des.knorau import KNORAU
|
|
100
|
+
|
|
101
|
+
# 1. Train your models
|
|
102
|
+
models = {"rf": rf, "xgb": xgb, "mlp": mlp}
|
|
103
|
+
|
|
104
|
+
# 2. Get predictions on a held-out validation set
|
|
105
|
+
# Regression: scalar arrays
|
|
106
|
+
# Classification: probability arrays OR hard predictions
|
|
107
|
+
val_preds = {name: m.predict_proba(X_val) for name, m in models.items()}
|
|
108
|
+
|
|
109
|
+
# 3. Fit the router
|
|
110
|
+
router = KNORAU(task="classification", metric="accuracy", mode="max", k=20)
|
|
111
|
+
router.fit(X_val, y_val, val_preds)
|
|
112
|
+
|
|
113
|
+
# 4. Route test samples
|
|
114
|
+
test_preds = {name: m.predict_proba(X_test) for name, m in models.items()}
|
|
115
|
+
|
|
116
|
+
for i, x in enumerate(X_test):
|
|
117
|
+
weights = router.predict(x, temperature=0.1)
|
|
118
|
+
# weights example: {"rf": 0.7, "xgb": 0.2, "mlp": 0.1}
|
|
119
|
+
prediction = sum(weights[n] * test_preds[n][i] for n in weights)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
For classification with probability arrays, blend the output the same way to
|
|
123
|
+
get a final probability distribution, then take the argmax.
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## Why deskit?
|
|
128
|
+
|
|
129
|
+
Most DES libraries are tied to scikit-learn. deskit only ever sees a numpy
|
|
130
|
+
feature matrix and a dict of prediction arrays, so the models themselves are
|
|
131
|
+
never touched after training. This allows for more flexibility and a lighter library.
|
|
132
|
+
|
|
133
|
+
Furthermore, deskit works with both classification and regression, while the majority of DES
|
|
134
|
+
libraries and literature is focused only on classification tasks.
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
# PyTorch example
|
|
138
|
+
with torch.no_grad():
|
|
139
|
+
val_preds = {name: m(X_val_t).cpu().numpy() for name, m in models.items()}
|
|
140
|
+
test_preds = {name: m(X_test_t).cpu().numpy() for name, m in models.items()}
|
|
141
|
+
|
|
142
|
+
router = KNORAU(task="classification", metric="accuracy", mode="max", k=20)
|
|
143
|
+
router.fit(X_val, y_val, val_preds)
|
|
144
|
+
weights = router.predict(X_test[i])
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## Algorithms
|
|
150
|
+
|
|
151
|
+
| Method | Best for | Notes |
|
|
152
|
+
|---|---|---|
|
|
153
|
+
| `KNNDWS` | Regression | Softmax over neighbourhood-averaged scores. Temperature controls sharpness. |
|
|
154
|
+
| `KNORAU` | Classification | Vote-count weighting. Each model earns one vote per neighbour it correctly classifies. |
|
|
155
|
+
| `KNORAE` | Classification | Intersection-based. Only models correct on all neighbours survive; falls back to smaller neighbourhoods. |
|
|
156
|
+
| `KNORAIU` | Classification | Like KNORA-U but votes are inverse-distance weighted. |
|
|
157
|
+
| `OLA` | Both | Hard selection: only the single best model in the neighbourhood contributes. |
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## ANN backends
|
|
162
|
+
|
|
163
|
+
deskit supports three Approximate Nearest Neighbour backends plus exact search:
|
|
164
|
+
|
|
165
|
+
| Preset | Backend | Install | Notes |
|
|
166
|
+
|---|---|---|---|
|
|
167
|
+
| `exact` | sklearn KNN | `scikit-learn` | Exact, no extra deps |
|
|
168
|
+
| `balanced` | FAISS IVF | `faiss-cpu` | ~98% recall, good default |
|
|
169
|
+
| `fast` | FAISS IVF | `faiss-cpu` | ~95% recall, faster queries |
|
|
170
|
+
| `turbo` | FAISS flat | `faiss-cpu` | Exact via FAISS, GPU-friendly |
|
|
171
|
+
| `high_dim_balanced` | HNSW | `hnswlib` | Best for >100 features, balanced |
|
|
172
|
+
| `high_dim_fast` | HNSW | `hnswlib` | Best for >100 features, faster |
|
|
173
|
+
|
|
174
|
+
Annoy is also available as a custom backend — memory-efficient and simple,
|
|
175
|
+
good for datasets that need to be persisted to disk.
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
# Exact search (no extra deps)
|
|
179
|
+
router = KNORAU(..., preset="exact")
|
|
180
|
+
|
|
181
|
+
# High-dimensional data
|
|
182
|
+
router = KNORAU(..., preset="high_dim_balanced")
|
|
183
|
+
|
|
184
|
+
# Custom FAISS config
|
|
185
|
+
router = KNORAU(..., preset="custom", finder="faiss",
|
|
186
|
+
index_type="ivf", n_probes=50)
|
|
187
|
+
|
|
188
|
+
# Annoy
|
|
189
|
+
router = KNORAU(..., preset="custom", finder="annoy",
|
|
190
|
+
n_trees=100, search_k=-1)
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## Custom metrics
|
|
196
|
+
|
|
197
|
+
Any callable `(y_true, y_pred) -> float` works:
|
|
198
|
+
|
|
199
|
+
```python
|
|
200
|
+
def pinball(y_true, y_pred, alpha=0.9):
|
|
201
|
+
e = y_true - y_pred
|
|
202
|
+
return alpha * e if e >= 0 else (alpha - 1) * e
|
|
203
|
+
|
|
204
|
+
router = KNNDWS(task="regression", metric=pinball, mode="min", k=20)
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
Built-in metric strings: `accuracy`, `mae`, `mse`, `rmse`, `log_loss`, `prob_correct`.
|
|
208
|
+
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
## Benchmark results
|
|
212
|
+
|
|
213
|
+
20-seed benchmark (seeds 0–19) on standard sklearn and OpenML datasets. "Best Single" is the best
|
|
214
|
+
individual model selected on the validation set. "Simple Average" is uniform
|
|
215
|
+
equal-weight blending, included as a baseline.
|
|
216
|
+
|
|
217
|
+
It is important to consider that these experiments were run with the default hyperparameters, meaning that
|
|
218
|
+
they could vary greatly with different values, and results could improve with tuning.
|
|
219
|
+
For a more detailed benchmark breakdown, see the [documentation](https://TikaaVo.github.io/deskit/).
|
|
220
|
+
To see the full results, see `results.txt` in the `tests` folder.
|
|
221
|
+
|
|
222
|
+
Pool: KNN, Decision Tree, SVR, Ridge, Bayesian Ridge.
|
|
223
|
+
|
|
224
|
+
This pool was selected for having variability in architectures while avoiding a single dominant model.
|
|
225
|
+
|
|
226
|
+
deskit algorithms tested: OLA, KNN-DWS, KNORA-U, KNORA-E, KNORA-IU.
|
|
227
|
+
|
|
228
|
+
### Regression (MAE, lower is better)
|
|
229
|
+
|
|
230
|
+
% shown as delta vs Best Single. 10-seed mean.
|
|
231
|
+
|
|
232
|
+
| Dataset | Best Single | Simple Avg | deskit best |
|
|
233
|
+
|------------------------------|-----------|---|-----------------------|
|
|
234
|
+
| California Housing (sklearn) | 0.3956 | +7.99% | **-2.24%** (KNN-DWS) |
|
|
235
|
+
| Bike Sharing (OpenML) | 51.6779 | +47.77% | **-5.34%** (KNN-DWS) |
|
|
236
|
+
| Abalone (OpenML) | **1.4981** | +1.14% | +1.47% (KNORA-U) |
|
|
237
|
+
| Diabetes (sklearn) | **44.5042** | +3.18% | +1.17% (KNN-DWS) |
|
|
238
|
+
| Conrete Strength (OpenML) | 5.2686 | +23.66% | **-1.05%** (KNORA-IU) |
|
|
239
|
+
|
|
240
|
+
deskit beats best single and simple averaging on 3/5 regression datasets. This shows how DES can provide a
|
|
241
|
+
strong boost if used on the right dataset, but it might be counterproductive if used blindly.
|
|
242
|
+
|
|
243
|
+
KNORA variants are designed for classification, which explains the poor performance
|
|
244
|
+
on regression datasets; However, some exception can occur in certain datasets, either where
|
|
245
|
+
feature space is has hard clusters (like in Concrete Strength) or when the target is discrete
|
|
246
|
+
and classification-like (like in Abalone).
|
|
247
|
+
|
|
248
|
+
### Classification (Accuracy, higher is better)
|
|
249
|
+
|
|
250
|
+
% shown as delta vs Best Single. 10-seed mean.
|
|
251
|
+
|
|
252
|
+
| Dataset | Best Single | Simple Avg | deskit best |
|
|
253
|
+
|------------------------|-------------|--------|-----------------------|
|
|
254
|
+
| HAR (OpenML) | 98.24% | -0.33% | **+0.14%** (KNN-DWS) |
|
|
255
|
+
| Yeast (OpenML) | 58.87% | +0.77% | **+1.66%** (KNORA-IU) |
|
|
256
|
+
| Image Segment (OpenML) | 93.70% | +1.40% | **+2.09%** (KNORA-IU) |
|
|
257
|
+
| Waveform (OpenML) | 89.95% | -2.05% | **+0.93%** (KNORA-E) |
|
|
258
|
+
| Vowel (OpenML) | **85.91%** | -0.98% | -0.40% (KNN-DWS) |
|
|
259
|
+
|
|
260
|
+
deskit beats or matches best single and simple averaging on 4/5 classification datasets. As seen on regression, DES
|
|
261
|
+
can improve or hurt performance, so it must be used wisely, but if used correctly it can show promising results.
|
|
262
|
+
|
|
263
|
+
### Speed (mean ms fit + predict, 20 seeds, all tested algorithms combined)
|
|
264
|
+
|
|
265
|
+
Consider that usually it is recommended to only use one algorithm at a time, this benchmark ran five of them at the
|
|
266
|
+
same time, so with a single one runtime is expected to be about 5x faster. For this benchmark, `preset='balanced'` was used,
|
|
267
|
+
so the backend was an ANN algorithm with FAISS IVF.
|
|
268
|
+
|
|
269
|
+
| Dataset | deskit |
|
|
270
|
+
|--------------------|----------|
|
|
271
|
+
| California Housing | 136.6 ms |
|
|
272
|
+
| Bike Sharing | 115.5 ms |
|
|
273
|
+
| Abalone | 28.5 ms |
|
|
274
|
+
| Diabetes | 8.1 ms |
|
|
275
|
+
| Conrete Strength | 9.4 ms |
|
|
276
|
+
| HAR | 297.5 ms |
|
|
277
|
+
| Yeast | 16.3 ms |
|
|
278
|
+
| Image Segment | 27.2 ms |
|
|
279
|
+
| Waveform | 48.9 ms |
|
|
280
|
+
| Vowel | 16.5 ms |
|
|
281
|
+
|
|
282
|
+
deskit caches all model predictions on the validation set at fit time and reads
|
|
283
|
+
from that matrix at inference.
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
287
|
+
## Contributing
|
|
288
|
+
|
|
289
|
+
Issues and PRs welcome.
|
deskit-0.1.0/README.md
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
# deskit
|
|
2
|
+
|
|
3
|
+
[deskit](https://TikaaVo.github.io/deskit/) is a flexible, light, and easy-to-use ensembling library that implements
|
|
4
|
+
Dynamic Ensemble Selection (DES) algorithms for ensembling multiple ML models
|
|
5
|
+
on a singular dataset.
|
|
6
|
+
|
|
7
|
+
The library works entirely with data, taking as input a validation dataset
|
|
8
|
+
along with pre-computed predictions and outputting a dictionary of weights
|
|
9
|
+
per model. This means that it can be used with any library or model without
|
|
10
|
+
requiring any wrappers, including custom models, popular ML libraries, and APIs.
|
|
11
|
+
|
|
12
|
+
deskit contains multiple different DES algorithms, and it works with both classification
|
|
13
|
+
and regression.
|
|
14
|
+
|
|
15
|
+
# Dynamic Ensemble Selection
|
|
16
|
+
|
|
17
|
+
Ensemble learning in machine learning refers to when multiple models trained on a
|
|
18
|
+
single dataset combine their predictions to create a single, more accurate prediction,
|
|
19
|
+
usually through weighted voting or picking the best model.
|
|
20
|
+
|
|
21
|
+
DES refers to techniques where the models or their voting weights are selected dynamically
|
|
22
|
+
for every test case. This selection bases on the idea of competence regions, which is the
|
|
23
|
+
concept that there are regions of feature space where certain models perform particularly well,
|
|
24
|
+
so every base model can be an expert in a different region.
|
|
25
|
+
Only the most competent, or an ensemble of the most competent models is selected for the prediction.
|
|
26
|
+
|
|
27
|
+
Through empirical studies, DES has been shown to perform best with small-sized, imbalanced, or
|
|
28
|
+
heterogeneous datasets, as well as non-stationary data (concept drift), models that haven't perfected a dataset,
|
|
29
|
+
and when used on an ensemble of models with differing architectures and perspectives.
|
|
30
|
+
|
|
31
|
+
However, DES is not an automatic improvement. It tends to perform worse when datasets are homogeneous or have low diversity,
|
|
32
|
+
when the validation set isn't a good representation of the test set, when using very high dimensional data or few training samples,
|
|
33
|
+
or when a single model dominates a dataset.
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## Installation
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install deskit
|
|
41
|
+
|
|
42
|
+
# The library runs with Nearest Neighbors from sklearn for exact KNN
|
|
43
|
+
pip install scikit-learn
|
|
44
|
+
|
|
45
|
+
# Alternatively, ANN can be used for faster runtimes at the cost of
|
|
46
|
+
# slightly lower accuracy. The following three are supported;
|
|
47
|
+
# Install the one you want to use.
|
|
48
|
+
pip install faiss-cpu # FAISS (good default for most datasets)
|
|
49
|
+
pip install annoy # Annoy (memory-efficient, simple)
|
|
50
|
+
pip install hnswlib # HNSW (best for high-dimensional data)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Dependencies
|
|
56
|
+
|
|
57
|
+
Python (>= 3.9)
|
|
58
|
+
|
|
59
|
+
NumPy (>= 1.21)
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Quick start
|
|
64
|
+
|
|
65
|
+
Full explanation of the algorithms, syntax, and parameters is available in the [documentation](https://TikaaVo.github.io/deskit/).
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from deskit.des.knorau import KNORAU
|
|
69
|
+
|
|
70
|
+
# 1. Train your models
|
|
71
|
+
models = {"rf": rf, "xgb": xgb, "mlp": mlp}
|
|
72
|
+
|
|
73
|
+
# 2. Get predictions on a held-out validation set
|
|
74
|
+
# Regression: scalar arrays
|
|
75
|
+
# Classification: probability arrays OR hard predictions
|
|
76
|
+
val_preds = {name: m.predict_proba(X_val) for name, m in models.items()}
|
|
77
|
+
|
|
78
|
+
# 3. Fit the router
|
|
79
|
+
router = KNORAU(task="classification", metric="accuracy", mode="max", k=20)
|
|
80
|
+
router.fit(X_val, y_val, val_preds)
|
|
81
|
+
|
|
82
|
+
# 4. Route test samples
|
|
83
|
+
test_preds = {name: m.predict_proba(X_test) for name, m in models.items()}
|
|
84
|
+
|
|
85
|
+
for i, x in enumerate(X_test):
|
|
86
|
+
weights = router.predict(x, temperature=0.1)
|
|
87
|
+
# weights example: {"rf": 0.7, "xgb": 0.2, "mlp": 0.1}
|
|
88
|
+
prediction = sum(weights[n] * test_preds[n][i] for n in weights)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
For classification with probability arrays, blend the output the same way to
|
|
92
|
+
get a final probability distribution, then take the argmax.
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## Why deskit?
|
|
97
|
+
|
|
98
|
+
Most DES libraries are tied to scikit-learn. deskit only ever sees a numpy
|
|
99
|
+
feature matrix and a dict of prediction arrays, so the models themselves are
|
|
100
|
+
never touched after training. This allows for more flexibility and a lighter library.
|
|
101
|
+
|
|
102
|
+
Furthermore, deskit works with both classification and regression, while the majority of DES
|
|
103
|
+
libraries and literature is focused only on classification tasks.
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
# PyTorch example
|
|
107
|
+
with torch.no_grad():
|
|
108
|
+
val_preds = {name: m(X_val_t).cpu().numpy() for name, m in models.items()}
|
|
109
|
+
test_preds = {name: m(X_test_t).cpu().numpy() for name, m in models.items()}
|
|
110
|
+
|
|
111
|
+
router = KNORAU(task="classification", metric="accuracy", mode="max", k=20)
|
|
112
|
+
router.fit(X_val, y_val, val_preds)
|
|
113
|
+
weights = router.predict(X_test[i])
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## Algorithms
|
|
119
|
+
|
|
120
|
+
| Method | Best for | Notes |
|
|
121
|
+
|---|---|---|
|
|
122
|
+
| `KNNDWS` | Regression | Softmax over neighbourhood-averaged scores. Temperature controls sharpness. |
|
|
123
|
+
| `KNORAU` | Classification | Vote-count weighting. Each model earns one vote per neighbour it correctly classifies. |
|
|
124
|
+
| `KNORAE` | Classification | Intersection-based. Only models correct on all neighbours survive; falls back to smaller neighbourhoods. |
|
|
125
|
+
| `KNORAIU` | Classification | Like KNORA-U but votes are inverse-distance weighted. |
|
|
126
|
+
| `OLA` | Both | Hard selection: only the single best model in the neighbourhood contributes. |
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## ANN backends
|
|
131
|
+
|
|
132
|
+
deskit supports three Approximate Nearest Neighbour backends plus exact search:
|
|
133
|
+
|
|
134
|
+
| Preset | Backend | Install | Notes |
|
|
135
|
+
|---|---|---|---|
|
|
136
|
+
| `exact` | sklearn KNN | `scikit-learn` | Exact, no extra deps |
|
|
137
|
+
| `balanced` | FAISS IVF | `faiss-cpu` | ~98% recall, good default |
|
|
138
|
+
| `fast` | FAISS IVF | `faiss-cpu` | ~95% recall, faster queries |
|
|
139
|
+
| `turbo` | FAISS flat | `faiss-cpu` | Exact via FAISS, GPU-friendly |
|
|
140
|
+
| `high_dim_balanced` | HNSW | `hnswlib` | Best for >100 features, balanced |
|
|
141
|
+
| `high_dim_fast` | HNSW | `hnswlib` | Best for >100 features, faster |
|
|
142
|
+
|
|
143
|
+
Annoy is also available as a custom backend — memory-efficient and simple,
|
|
144
|
+
good for datasets that need to be persisted to disk.
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
# Exact search (no extra deps)
|
|
148
|
+
router = KNORAU(..., preset="exact")
|
|
149
|
+
|
|
150
|
+
# High-dimensional data
|
|
151
|
+
router = KNORAU(..., preset="high_dim_balanced")
|
|
152
|
+
|
|
153
|
+
# Custom FAISS config
|
|
154
|
+
router = KNORAU(..., preset="custom", finder="faiss",
|
|
155
|
+
index_type="ivf", n_probes=50)
|
|
156
|
+
|
|
157
|
+
# Annoy
|
|
158
|
+
router = KNORAU(..., preset="custom", finder="annoy",
|
|
159
|
+
n_trees=100, search_k=-1)
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## Custom metrics
|
|
165
|
+
|
|
166
|
+
Any callable `(y_true, y_pred) -> float` works:
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
def pinball(y_true, y_pred, alpha=0.9):
|
|
170
|
+
e = y_true - y_pred
|
|
171
|
+
return alpha * e if e >= 0 else (alpha - 1) * e
|
|
172
|
+
|
|
173
|
+
router = KNNDWS(task="regression", metric=pinball, mode="min", k=20)
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Built-in metric strings: `accuracy`, `mae`, `mse`, `rmse`, `log_loss`, `prob_correct`.
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## Benchmark results
|
|
181
|
+
|
|
182
|
+
20-seed benchmark (seeds 0–19) on standard sklearn and OpenML datasets. "Best Single" is the best
|
|
183
|
+
individual model selected on the validation set. "Simple Average" is uniform
|
|
184
|
+
equal-weight blending, included as a baseline.
|
|
185
|
+
|
|
186
|
+
It is important to consider that these experiments were run with the default hyperparameters, meaning that
|
|
187
|
+
they could vary greatly with different values, and results could improve with tuning.
|
|
188
|
+
For a more detailed benchmark breakdown, see the [documentation](https://TikaaVo.github.io/deskit/).
|
|
189
|
+
To see the full results, see `results.txt` in the `tests` folder.
|
|
190
|
+
|
|
191
|
+
Pool: KNN, Decision Tree, SVR, Ridge, Bayesian Ridge.
|
|
192
|
+
|
|
193
|
+
This pool was selected for having variability in architectures while avoiding a single dominant model.
|
|
194
|
+
|
|
195
|
+
deskit algorithms tested: OLA, KNN-DWS, KNORA-U, KNORA-E, KNORA-IU.
|
|
196
|
+
|
|
197
|
+
### Regression (MAE, lower is better)
|
|
198
|
+
|
|
199
|
+
% shown as delta vs Best Single. 10-seed mean.
|
|
200
|
+
|
|
201
|
+
| Dataset | Best Single | Simple Avg | deskit best |
|
|
202
|
+
|------------------------------|-----------|---|-----------------------|
|
|
203
|
+
| California Housing (sklearn) | 0.3956 | +7.99% | **-2.24%** (KNN-DWS) |
|
|
204
|
+
| Bike Sharing (OpenML) | 51.6779 | +47.77% | **-5.34%** (KNN-DWS) |
|
|
205
|
+
| Abalone (OpenML) | **1.4981** | +1.14% | +1.47% (KNORA-U) |
|
|
206
|
+
| Diabetes (sklearn) | **44.5042** | +3.18% | +1.17% (KNN-DWS) |
|
|
207
|
+
| Conrete Strength (OpenML) | 5.2686 | +23.66% | **-1.05%** (KNORA-IU) |
|
|
208
|
+
|
|
209
|
+
deskit beats best single and simple averaging on 3/5 regression datasets. This shows how DES can provide a
|
|
210
|
+
strong boost if used on the right dataset, but it might be counterproductive if used blindly.
|
|
211
|
+
|
|
212
|
+
KNORA variants are designed for classification, which explains the poor performance
|
|
213
|
+
on regression datasets; However, some exception can occur in certain datasets, either where
|
|
214
|
+
feature space is has hard clusters (like in Concrete Strength) or when the target is discrete
|
|
215
|
+
and classification-like (like in Abalone).
|
|
216
|
+
|
|
217
|
+
### Classification (Accuracy, higher is better)
|
|
218
|
+
|
|
219
|
+
% shown as delta vs Best Single. 10-seed mean.
|
|
220
|
+
|
|
221
|
+
| Dataset | Best Single | Simple Avg | deskit best |
|
|
222
|
+
|------------------------|-------------|--------|-----------------------|
|
|
223
|
+
| HAR (OpenML) | 98.24% | -0.33% | **+0.14%** (KNN-DWS) |
|
|
224
|
+
| Yeast (OpenML) | 58.87% | +0.77% | **+1.66%** (KNORA-IU) |
|
|
225
|
+
| Image Segment (OpenML) | 93.70% | +1.40% | **+2.09%** (KNORA-IU) |
|
|
226
|
+
| Waveform (OpenML) | 89.95% | -2.05% | **+0.93%** (KNORA-E) |
|
|
227
|
+
| Vowel (OpenML) | **85.91%** | -0.98% | -0.40% (KNN-DWS) |
|
|
228
|
+
|
|
229
|
+
deskit beats or matches best single and simple averaging on 4/5 classification datasets. As seen on regression, DES
|
|
230
|
+
can improve or hurt performance, so it must be used wisely, but if used correctly it can show promising results.
|
|
231
|
+
|
|
232
|
+
### Speed (mean ms fit + predict, 20 seeds, all tested algorithms combined)
|
|
233
|
+
|
|
234
|
+
Consider that usually it is recommended to only use one algorithm at a time, this benchmark ran five of them at the
|
|
235
|
+
same time, so with a single one runtime is expected to be about 5x faster. For this benchmark, `preset='balanced'` was used,
|
|
236
|
+
so the backend was an ANN algorithm with FAISS IVF.
|
|
237
|
+
|
|
238
|
+
| Dataset | deskit |
|
|
239
|
+
|--------------------|----------|
|
|
240
|
+
| California Housing | 136.6 ms |
|
|
241
|
+
| Bike Sharing | 115.5 ms |
|
|
242
|
+
| Abalone | 28.5 ms |
|
|
243
|
+
| Diabetes | 8.1 ms |
|
|
244
|
+
| Conrete Strength | 9.4 ms |
|
|
245
|
+
| HAR | 297.5 ms |
|
|
246
|
+
| Yeast | 16.3 ms |
|
|
247
|
+
| Image Segment | 27.2 ms |
|
|
248
|
+
| Waveform | 48.9 ms |
|
|
249
|
+
| Vowel | 16.5 ms |
|
|
250
|
+
|
|
251
|
+
deskit caches all model predictions on the validation set at fit time and reads
|
|
252
|
+
from that matrix at inference.
|
|
253
|
+
|
|
254
|
+
---
|
|
255
|
+
|
|
256
|
+
## Contributing
|
|
257
|
+
|
|
258
|
+
Issues and PRs welcome.
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "deskit"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A Python library for Dynamic Ensemble Selection"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.8"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Tikhon Vodyanov" }
|
|
14
|
+
]
|
|
15
|
+
keywords = ["ensemble", "machine learning", "dynamic ensemble selection", "model routing"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [
|
|
22
|
+
"numpy>=1.21",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[tool.setuptools.packages.find]
|
|
26
|
+
where = ["src"]
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
exact = ["scikit-learn>=1.0"]
|
|
30
|
+
faiss = ["faiss-cpu>=1.7"]
|
|
31
|
+
hnsw = ["hnswlib>=0.7"]
|
|
32
|
+
annoy = ["annoy>=1.17"]
|
|
33
|
+
all = ["scikit-learn>=1.0", "faiss-cpu>=1.7", "hnswlib>=0.7", "annoy>=1.17"]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://github.com/TikaaVo/deskit"
|
|
37
|
+
Repository = "https://github.com/TikaaVo/deskit.git"
|
deskit-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""
|
|
2
|
+
deskit — Dynamic Ensemble Selection library.
|
|
3
|
+
|
|
4
|
+
Metrics
|
|
5
|
+
-------
|
|
6
|
+
Pass a metric name string:
|
|
7
|
+
|
|
8
|
+
KNNDWS(task='classification', metric='log_loss', mode='min')
|
|
9
|
+
|
|
10
|
+
Or import a metric function directly:
|
|
11
|
+
|
|
12
|
+
from deskit.metrics import log_loss, mae
|
|
13
|
+
|
|
14
|
+
KNNDWS(task='classification', metric=log_loss, mode='min')
|
|
15
|
+
|
|
16
|
+
Available built-in metrics:
|
|
17
|
+
Scalar predictions (pass predict() output):
|
|
18
|
+
'mae', 'mse', 'rmse', 'accuracy'
|
|
19
|
+
|
|
20
|
+
Probability predictions (pass predict_proba() output):
|
|
21
|
+
'log_loss', 'prob_correct'
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from deskit.des.knndws import KNNDWS
|
|
25
|
+
from deskit.des.ola import OLA
|
|
26
|
+
from deskit.des.knorau import KNORAU
|
|
27
|
+
from deskit.des.knorae import KNORAE
|
|
28
|
+
from deskit.des.knoraiu import KNORAIU
|
|
29
|
+
from deskit.router import DynamicRouter
|
|
30
|
+
from deskit._config import SPEED_PRESETS, list_presets
|
|
31
|
+
from deskit.analysis import analyze
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
'KNNDWS',
|
|
35
|
+
'OLA',
|
|
36
|
+
'KNORAU',
|
|
37
|
+
'KNORAE',
|
|
38
|
+
'KNORAIU',
|
|
39
|
+
'DynamicRouter',
|
|
40
|
+
'SPEED_PRESETS',
|
|
41
|
+
'list_presets',
|
|
42
|
+
'analyze',
|
|
43
|
+
]
|