kernelboost 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kernelboost/__init__.py +11 -0
- kernelboost/backend.py +202 -0
- kernelboost/booster.py +798 -0
- kernelboost/cpu_functions.py +259 -0
- kernelboost/estimator.py +258 -0
- kernelboost/feature_selection.py +305 -0
- kernelboost/gpu_functions.py +164 -0
- kernelboost/kernels.c +251 -0
- kernelboost/kernels.cu +84 -0
- kernelboost/libkernels.dll +0 -0
- kernelboost/libkernels.so +0 -0
- kernelboost/multiclassbooster.py +516 -0
- kernelboost/objectives.py +336 -0
- kernelboost/optimizer.py +161 -0
- kernelboost/rho_optimizer.py +530 -0
- kernelboost/tree.py +485 -0
- kernelboost/utilities.py +459 -0
- kernelboost-0.1.0.dist-info/LICENSE +21 -0
- kernelboost-0.1.0.dist-info/METADATA +279 -0
- kernelboost-0.1.0.dist-info/RECORD +22 -0
- kernelboost-0.1.0.dist-info/WHEEL +5 -0
- kernelboost-0.1.0.dist-info/top_level.txt +1 -0
kernelboost/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""KernelBooster: Gradient boosting with Nadaraya-Watson (local constant) estimator as base learners."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
from .booster import KernelBooster
|
|
6
|
+
from .multiclassbooster import MulticlassBooster
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"KernelBooster",
|
|
10
|
+
"MulticlassBooster",
|
|
11
|
+
]
|
kernelboost/backend.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
class Backend:
|
|
4
|
+
"""Unified interface for GPU/CPU kernel operations."""
|
|
5
|
+
|
|
6
|
+
def __init__(self, use_gpu: bool = False, kernel_type: str = 'gaussian'):
|
|
7
|
+
"""
|
|
8
|
+
Initialize backend.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
use_gpu: If True, use GPU.
|
|
12
|
+
kernel_type: Kernel type
|
|
13
|
+
"""
|
|
14
|
+
self.use_gpu = use_gpu
|
|
15
|
+
self.max_matrix_elements = None # CPU has no limit
|
|
16
|
+
|
|
17
|
+
self.kernel_type = kernel_type
|
|
18
|
+
kernel_types = {"gaussian": 0, "laplace": 1}
|
|
19
|
+
self._kernel_type_int = kernel_types[kernel_type]
|
|
20
|
+
|
|
21
|
+
if use_gpu:
|
|
22
|
+
if not self._check_gpu():
|
|
23
|
+
print("Warning: GPU unavailable, falling back to CPU")
|
|
24
|
+
self.use_gpu = False
|
|
25
|
+
else:
|
|
26
|
+
self._compute_memory_limit()
|
|
27
|
+
|
|
28
|
+
def _check_gpu(self) -> bool:
|
|
29
|
+
try:
|
|
30
|
+
from .gpu_functions import is_gpu_available
|
|
31
|
+
return is_gpu_available()
|
|
32
|
+
except ImportError:
|
|
33
|
+
return False
|
|
34
|
+
|
|
35
|
+
def _compute_memory_limit(self):
|
|
36
|
+
"""Calculate max kernel matrix size that fits in GPU memory."""
|
|
37
|
+
import cupy as cp
|
|
38
|
+
free_bytes, _ = cp.cuda.Device().mem_info
|
|
39
|
+
# float32 = 4 bytes, use 50% of free memory (conservative estimate)
|
|
40
|
+
self.max_matrix_elements = int(free_bytes * 0.5 / 4)
|
|
41
|
+
self.max_symmetric_n = int(self.max_matrix_elements ** 0.5)
|
|
42
|
+
|
|
43
|
+
def _check_memory(self, n_rows: int, n_cols: int, operation: str):
|
|
44
|
+
"""Raise MemoryError if matrix won't fit in GPU memory."""
|
|
45
|
+
if self.max_matrix_elements is None:
|
|
46
|
+
return # CPU has no limit
|
|
47
|
+
required = n_rows * n_cols
|
|
48
|
+
if required > self.max_matrix_elements:
|
|
49
|
+
raise MemoryError(
|
|
50
|
+
f"GPU memory insufficient for {operation}."
|
|
51
|
+
f"Matrix ({n_rows}, {n_cols}) = {required/1e6:.0f}M elements,"
|
|
52
|
+
f"limit ~{self.max_matrix_elements/1e6:.0f}M."
|
|
53
|
+
f"Max symmetric size: {self.max_symmetric_n}x{self.max_symmetric_n}."
|
|
54
|
+
f"Use use_gpu=False or reduce data size."
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def name(self) -> str:
|
|
59
|
+
return "gpu" if self.use_gpu else "cpu"
|
|
60
|
+
|
|
61
|
+
def predict(self, training_dependent: np.ndarray,
|
|
62
|
+
training_features: np.ndarray,
|
|
63
|
+
prediction_features: np.ndarray,
|
|
64
|
+
precision: np.ndarray) -> np.ndarray:
|
|
65
|
+
"""
|
|
66
|
+
Predict using Nadaraya-Watson regression with given precision.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
training_dependent: (n_train,) or (n_train, 1)
|
|
70
|
+
training_features: (n_train, n_features)
|
|
71
|
+
prediction_features: (n_pred, n_features)
|
|
72
|
+
precision: precision array
|
|
73
|
+
"""
|
|
74
|
+
self._check_memory(
|
|
75
|
+
prediction_features.shape[0], training_features.shape[0], "predict"
|
|
76
|
+
)
|
|
77
|
+
if self.use_gpu:
|
|
78
|
+
from .gpu_functions import cuda_predict
|
|
79
|
+
return cuda_predict(training_dependent, training_features,
|
|
80
|
+
prediction_features, precision,
|
|
81
|
+
self._kernel_type_int)
|
|
82
|
+
else:
|
|
83
|
+
from .cpu_functions import cpu_predict
|
|
84
|
+
return cpu_predict(training_dependent, training_features,
|
|
85
|
+
prediction_features, precision,
|
|
86
|
+
self._kernel_type_int)
|
|
87
|
+
|
|
88
|
+
def loo_cv(self, training_dependent: np.ndarray,
|
|
89
|
+
training_features: np.ndarray,
|
|
90
|
+
precision: np.ndarray,
|
|
91
|
+
mean_y: float = 0.0) -> float:
|
|
92
|
+
"""
|
|
93
|
+
Leave-one-out cross-validation error with given precision.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
training_dependent: (n_train,) or (n_train, 1)
|
|
97
|
+
training_features: (n_train, n_features)
|
|
98
|
+
precision: precision array
|
|
99
|
+
mean_y: mean of training_dependent for zero-weight fallback
|
|
100
|
+
"""
|
|
101
|
+
self._check_memory(
|
|
102
|
+
training_features.shape[0], training_features.shape[0], "LOO-CV"
|
|
103
|
+
)
|
|
104
|
+
if self.use_gpu:
|
|
105
|
+
from .gpu_functions import cuda_loo
|
|
106
|
+
return float(cuda_loo(training_dependent, training_features, precision,
|
|
107
|
+
self._kernel_type_int, mean_y))
|
|
108
|
+
else:
|
|
109
|
+
from .cpu_functions import cpu_loo_mse
|
|
110
|
+
return cpu_loo_mse(training_dependent, training_features, precision,
|
|
111
|
+
self._kernel_type_int, mean_y)
|
|
112
|
+
|
|
113
|
+
def predict_with_variance(
|
|
114
|
+
self,
|
|
115
|
+
training_dependent: np.ndarray,
|
|
116
|
+
training_features: np.ndarray,
|
|
117
|
+
prediction_features: np.ndarray,
|
|
118
|
+
precision: np.ndarray
|
|
119
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
120
|
+
"""
|
|
121
|
+
EXPERIMENTAL. Nadaraya-Watson prediction with local variance estimation based on
|
|
122
|
+
Var(Y|X) = E[Y²|X] - prediction².
|
|
123
|
+
Args:
|
|
124
|
+
training_dependent: (n_train,) or (n_train, 1)
|
|
125
|
+
training_features: (n_train, n_features)
|
|
126
|
+
prediction_features: (n_pred, n_features)
|
|
127
|
+
precision: precision array
|
|
128
|
+
|
|
129
|
+
"""
|
|
130
|
+
self._check_memory(
|
|
131
|
+
prediction_features.shape[0], training_features.shape[0],
|
|
132
|
+
"predict_with_variance"
|
|
133
|
+
)
|
|
134
|
+
if self.use_gpu:
|
|
135
|
+
from .gpu_functions import cuda_predict_with_variance
|
|
136
|
+
return cuda_predict_with_variance(
|
|
137
|
+
training_dependent, training_features,
|
|
138
|
+
prediction_features, precision,
|
|
139
|
+
self._kernel_type_int
|
|
140
|
+
)
|
|
141
|
+
else:
|
|
142
|
+
from .cpu_functions import cpu_predict_with_variance
|
|
143
|
+
return cpu_predict_with_variance(
|
|
144
|
+
training_dependent, training_features,
|
|
145
|
+
prediction_features, precision,
|
|
146
|
+
self._kernel_type_int
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
def get_weights(
|
|
150
|
+
self,
|
|
151
|
+
training_features: np.ndarray,
|
|
152
|
+
prediction_features: np.ndarray,
|
|
153
|
+
precision: np.ndarray
|
|
154
|
+
) -> np.ndarray:
|
|
155
|
+
"""
|
|
156
|
+
Returns kernel weight matrix (n_pred, n_train).
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
training_features: (n_train, n_features)
|
|
160
|
+
prediction_features: (n_pred, n_features)
|
|
161
|
+
precision: precision array
|
|
162
|
+
"""
|
|
163
|
+
self._check_memory(
|
|
164
|
+
prediction_features.shape[0], training_features.shape[0],
|
|
165
|
+
"get_weights"
|
|
166
|
+
)
|
|
167
|
+
if self.use_gpu:
|
|
168
|
+
from .gpu_functions import cuda_get_weights
|
|
169
|
+
return cuda_get_weights(
|
|
170
|
+
training_features, prediction_features, precision,
|
|
171
|
+
self._kernel_type_int
|
|
172
|
+
)
|
|
173
|
+
else:
|
|
174
|
+
from .cpu_functions import cpu_get_weights
|
|
175
|
+
return cpu_get_weights(
|
|
176
|
+
training_features, prediction_features, precision,
|
|
177
|
+
self._kernel_type_int
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
def similarity(self, prediction_features: np.ndarray,
|
|
181
|
+
training_features: np.ndarray,
|
|
182
|
+
precision: np.ndarray) -> np.ndarray:
|
|
183
|
+
"""
|
|
184
|
+
Compute kernel similarity scores.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
prediction_features: (n_pred, n_features)
|
|
188
|
+
training_features: (n_train, n_features)
|
|
189
|
+
precision: precision array
|
|
190
|
+
|
|
191
|
+
"""
|
|
192
|
+
self._check_memory(
|
|
193
|
+
prediction_features.shape[0], training_features.shape[0], "similarity"
|
|
194
|
+
)
|
|
195
|
+
if self.use_gpu:
|
|
196
|
+
from .gpu_functions import cuda_similarity
|
|
197
|
+
return cuda_similarity(prediction_features, training_features, precision,
|
|
198
|
+
self._kernel_type_int)
|
|
199
|
+
else:
|
|
200
|
+
from .cpu_functions import cpu_similarity
|
|
201
|
+
return cpu_similarity(prediction_features, training_features, precision,
|
|
202
|
+
self._kernel_type_int)
|