gfdl 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gfdl/__init__.py +36 -0
- gfdl/activations.py +194 -0
- gfdl/model.py +851 -0
- gfdl/tests/__init__.py +0 -0
- gfdl/tests/test_model.py +518 -0
- gfdl/tests/test_regression.py +142 -0
- gfdl/weights.py +378 -0
- gfdl-0.1.0.dist-info/METADATA +33 -0
- gfdl-0.1.0.dist-info/RECORD +12 -0
- gfdl-0.1.0.dist-info/WHEEL +5 -0
- gfdl-0.1.0.dist-info/licenses/COPYING +29 -0
- gfdl-0.1.0.dist-info/top_level.txt +1 -0
gfdl/model.py
ADDED
|
@@ -0,0 +1,851 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Estimators for gradient free deep learning.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from scipy.special import logsumexp
|
|
7
|
+
from scipy.stats import mode
|
|
8
|
+
from sklearn.base import (
|
|
9
|
+
BaseEstimator,
|
|
10
|
+
ClassifierMixin,
|
|
11
|
+
MultiOutputMixin,
|
|
12
|
+
RegressorMixin,
|
|
13
|
+
)
|
|
14
|
+
from sklearn.linear_model import Ridge
|
|
15
|
+
from sklearn.preprocessing import OneHotEncoder
|
|
16
|
+
from sklearn.utils.metaestimators import available_if
|
|
17
|
+
from sklearn.utils.multiclass import unique_labels
|
|
18
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
|
19
|
+
|
|
20
|
+
from gfdl.activations import resolve_activation
|
|
21
|
+
from gfdl.weights import resolve_weight
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class GFDL(BaseEstimator):
|
|
25
|
+
"""Base class for GFDL for classification and regression."""
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
hidden_layer_sizes: np.typing.ArrayLike = (100,),
|
|
29
|
+
activation: str = "identity",
|
|
30
|
+
weight_scheme: str = "uniform",
|
|
31
|
+
direct_links: bool = True,
|
|
32
|
+
seed: int = None,
|
|
33
|
+
reg_alpha: float = None,
|
|
34
|
+
rtol: float | None = None,
|
|
35
|
+
):
|
|
36
|
+
self.hidden_layer_sizes = hidden_layer_sizes
|
|
37
|
+
self.activation = activation
|
|
38
|
+
self.direct_links = direct_links
|
|
39
|
+
self.seed = seed
|
|
40
|
+
self.weight_scheme = weight_scheme
|
|
41
|
+
self.reg_alpha = reg_alpha
|
|
42
|
+
self.rtol = rtol
|
|
43
|
+
|
|
44
|
+
def fit(self, X, Y):
|
|
45
|
+
# Assumption : X, Y have been pre-processed.
|
|
46
|
+
# X shape: (n_samples, n_features)
|
|
47
|
+
# Y shape: (n_samples, n_classes-1)
|
|
48
|
+
if self.reg_alpha is not None and self.reg_alpha < 0.0:
|
|
49
|
+
raise ValueError("Negative reg_alpha. Expected range : None or [0.0, inf).")
|
|
50
|
+
fn = resolve_activation(self.activation)[1]
|
|
51
|
+
self._activation_fn = fn
|
|
52
|
+
self._N = X.shape[1]
|
|
53
|
+
hidden_layer_sizes = np.asarray(self.hidden_layer_sizes)
|
|
54
|
+
self._weight_mode = resolve_weight(self.weight_scheme)
|
|
55
|
+
|
|
56
|
+
# weights shape: (n_layers,)
|
|
57
|
+
# biases shape: (n_layers,)
|
|
58
|
+
self.W_ = []
|
|
59
|
+
self.b_ = []
|
|
60
|
+
rng = self.get_generator(self.seed)
|
|
61
|
+
|
|
62
|
+
self.W_.append(
|
|
63
|
+
self._weight_mode(
|
|
64
|
+
self._N, hidden_layer_sizes[0], rng=self.get_generator(self.seed)
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
self.b_.append(
|
|
68
|
+
self._weight_mode(1, hidden_layer_sizes[0], rng=rng)
|
|
69
|
+
.reshape(-1)
|
|
70
|
+
)
|
|
71
|
+
for i, layer in enumerate(hidden_layer_sizes[1:]):
|
|
72
|
+
# (n_hidden, n_features)
|
|
73
|
+
self.W_.append(
|
|
74
|
+
self._weight_mode(hidden_layer_sizes[i], layer, rng=rng,)
|
|
75
|
+
)
|
|
76
|
+
# (n_hidden,)
|
|
77
|
+
self.b_.append(
|
|
78
|
+
self._weight_mode(1, layer, rng=rng,).reshape(-1)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# hypothesis space shape: (n_layers,)
|
|
82
|
+
Hs = []
|
|
83
|
+
H_prev = X
|
|
84
|
+
for w, b in zip(self.W_, self.b_, strict=False):
|
|
85
|
+
Z = H_prev @ w.T + b # (n_samples, n_hidden)
|
|
86
|
+
H_prev = self._activation_fn(Z)
|
|
87
|
+
Hs.append(H_prev)
|
|
88
|
+
|
|
89
|
+
# design matrix shape: (n_samples, sum_hidden+n_features)
|
|
90
|
+
# or (n_samples, sum_hidden)
|
|
91
|
+
if self.direct_links:
|
|
92
|
+
Hs.append(X)
|
|
93
|
+
D = np.hstack(Hs)
|
|
94
|
+
|
|
95
|
+
# beta shape: (sum_hidden+n_features, n_classes-1)
|
|
96
|
+
# or (sum_hidden, n_classes-1)
|
|
97
|
+
|
|
98
|
+
# If reg_alpha is None, use direct solve using
|
|
99
|
+
# MoorePenrose Pseudo-Inverse, otherwise use ridge regularized form.
|
|
100
|
+
if self.reg_alpha is None:
|
|
101
|
+
self.coeff_ = np.linalg.pinv(D, rtol=self.rtol) @ Y
|
|
102
|
+
else:
|
|
103
|
+
ridge = Ridge(alpha=self.reg_alpha, fit_intercept=False)
|
|
104
|
+
ridge.fit(D, Y)
|
|
105
|
+
self.coeff_ = ridge.coef_.T
|
|
106
|
+
return self
|
|
107
|
+
|
|
108
|
+
def predict(self, X):
|
|
109
|
+
check_is_fitted(self)
|
|
110
|
+
Hs = []
|
|
111
|
+
H_prev = X
|
|
112
|
+
for W, b in zip(self.W_, self.b_, strict=False):
|
|
113
|
+
Z = H_prev @ W.T + b # (n, m)
|
|
114
|
+
H_prev = self._activation_fn(Z)
|
|
115
|
+
Hs.append(H_prev)
|
|
116
|
+
|
|
117
|
+
if self.direct_links:
|
|
118
|
+
Hs.append(X)
|
|
119
|
+
D = np.hstack(Hs)
|
|
120
|
+
out = D @ self.coeff_
|
|
121
|
+
|
|
122
|
+
return out
|
|
123
|
+
|
|
124
|
+
def get_generator(self, seed):
|
|
125
|
+
return np.random.default_rng(seed)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class GFDLClassifier(ClassifierMixin, GFDL):
|
|
129
|
+
"""
|
|
130
|
+
Random vector functional link network classifier.
|
|
131
|
+
|
|
132
|
+
This model fits a feedforward neural network with fixed random hidden-layer
|
|
133
|
+
parameters and solves for the output weights using linear least squares or
|
|
134
|
+
ridge regression. When direct links are disabled, the model architecture corresponds
|
|
135
|
+
to an Extreme Learning Machine (ELM) architecture.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
hidden_layer_sizes : array-like of shape (n_layers,), default=(100,)
|
|
140
|
+
The ith element represents the number of neurons in the ith
|
|
141
|
+
hidden layer.
|
|
142
|
+
|
|
143
|
+
activation : str, default='identity'
|
|
144
|
+
Activation function for the hidden layers.
|
|
145
|
+
|
|
146
|
+
- 'identity', no-op activation, useful to implement linear bottleneck,
|
|
147
|
+
returns f(x) = x
|
|
148
|
+
|
|
149
|
+
- 'tanh': :func:`tanh <gfdl.activations.tanh>`.
|
|
150
|
+
|
|
151
|
+
- 'relu': :func:`relu <gfdl.activations.relu>`.
|
|
152
|
+
|
|
153
|
+
- 'sigmoid': :func:`sigmoid <gfdl.activations.sigmoid>`.
|
|
154
|
+
|
|
155
|
+
- 'softmax': :func:`softmax <gfdl.activations.softmax>`.
|
|
156
|
+
|
|
157
|
+
- 'softmin': :func:`softmin <gfdl.activations.softmin>`.
|
|
158
|
+
|
|
159
|
+
- 'log_sigmoid': :func:`log_sigmoid <gfdl.activations.log_sigmoid>`.
|
|
160
|
+
|
|
161
|
+
- 'log_softmax': :func:`log_softmax <gfdl.activations.log_softmax>`.
|
|
162
|
+
|
|
163
|
+
weight_scheme : str, default='uniform'
|
|
164
|
+
Distribution used to initialize the random hidden-layer weights.
|
|
165
|
+
|
|
166
|
+
The initialization functions generate weight matrices of shape
|
|
167
|
+
(n_hidden_units, n_features), where values are drawn
|
|
168
|
+
according to the selected scheme.
|
|
169
|
+
|
|
170
|
+
- 'zeros': set weights to zeros (:func:`zeros <gfdl.weights.zeros>`).
|
|
171
|
+
|
|
172
|
+
- 'range': set weights to normalized np.arange
|
|
173
|
+
(:func:`range <gfdl.weights.range>`).
|
|
174
|
+
|
|
175
|
+
- 'uniform': uniform distribution (:func:`uniform <gfdl.weights.uniform>`).
|
|
176
|
+
|
|
177
|
+
- 'he_uniform': He uniform distribution
|
|
178
|
+
(:func:`he_uniform <gfdl.weights.he_uniform>`).
|
|
179
|
+
|
|
180
|
+
- 'lecun_uniform': Lecun uniform distribution
|
|
181
|
+
(:func:`lecun_uniform <gfdl.weights.lecun_uniform>`).
|
|
182
|
+
|
|
183
|
+
- 'glorot_uniform': Glorot uniform distribution
|
|
184
|
+
(:func:`glorot_uniform <gfdl.weights.glorot_uniform>`).
|
|
185
|
+
|
|
186
|
+
- 'normal': Normal distribution (:func:`normal <gfdl.weights.normal>`).
|
|
187
|
+
|
|
188
|
+
- 'he_normal': He normal distribution
|
|
189
|
+
(:func:`he_normal <gfdl.weights.he_normal>`).
|
|
190
|
+
|
|
191
|
+
- 'lecun_normal': Lecun normal distribution
|
|
192
|
+
(:func:`lecun_normal <gfdl.weights.lecun_normal>`).
|
|
193
|
+
|
|
194
|
+
- 'glorot_normal': Glorot normal distribution
|
|
195
|
+
(:func:`glorot_normal <gfdl.weights.glorot_normal>`).
|
|
196
|
+
|
|
197
|
+
direct_links : bool, default=True
|
|
198
|
+
Whether to connect input layer to output nodes.
|
|
199
|
+
When set to False, only the hidden-layer activations are used, corresponding
|
|
200
|
+
to the Extreme Learning Machine (ELM) architecture.
|
|
201
|
+
|
|
202
|
+
seed : int, RandomState instance, default=None
|
|
203
|
+
Determines random number generation for weights and bias
|
|
204
|
+
initialization.
|
|
205
|
+
Pass an int for reproducible results across multiple function calls.
|
|
206
|
+
See :term:`Glossary <random_state>`.
|
|
207
|
+
|
|
208
|
+
reg_alpha : float, default=None
|
|
209
|
+
Amount of ridge shrinkage to apply in order to improve
|
|
210
|
+
conditioning during Ridge regression. When set to zero or `None`,
|
|
211
|
+
model uses direct solve using Moore-Penrose Pseudo-Inverse.
|
|
212
|
+
|
|
213
|
+
rtol : float, default=None
|
|
214
|
+
Cutoff for small singular values for the Moore-Penrose
|
|
215
|
+
pseudo-inverse. Only applies when ``reg_alpha=None``.
|
|
216
|
+
When ``rtol=None``, the array API standard default for
|
|
217
|
+
``pinv`` is used.
|
|
218
|
+
|
|
219
|
+
Attributes
|
|
220
|
+
----------
|
|
221
|
+
n_features_in_ : int
|
|
222
|
+
Number of features seen during :term:`fit`.
|
|
223
|
+
|
|
224
|
+
classes_ : ndarray or list of ndarray of shape (n_classes,)
|
|
225
|
+
Class labels for each output.
|
|
226
|
+
|
|
227
|
+
W_ : list of ndarray of shape (n_layers,)
|
|
228
|
+
Weight matrices of the hidden layers. The ith element in the list represents the
|
|
229
|
+
weight matrix corresponding to layer i.
|
|
230
|
+
|
|
231
|
+
b_ : list of ndarray of shape (n_layers,)
|
|
232
|
+
Bias vectors of the hidden layers. The ith element in the list represents the
|
|
233
|
+
bias term corresponding to layer i.
|
|
234
|
+
|
|
235
|
+
coeff_ : ndarray of shape (n_features_out, n_outputs)
|
|
236
|
+
Output weight matrix learned by fit method.
|
|
237
|
+
|
|
238
|
+
See Also
|
|
239
|
+
--------
|
|
240
|
+
GFDLRegressor : Regressor variant for the RVFL architecture.
|
|
241
|
+
|
|
242
|
+
Examples
|
|
243
|
+
--------
|
|
244
|
+
>>> from gfdl.model import GFDLClassifier
|
|
245
|
+
>>> from sklearn.datasets import make_classification
|
|
246
|
+
>>> from sklearn.model_selection import train_test_split
|
|
247
|
+
>>> X, y = make_classification(n_samples=100, random_state=1)
|
|
248
|
+
>>> X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,
|
|
249
|
+
... random_state=1)
|
|
250
|
+
>>> clf = GFDLClassifier(seed=1).fit(X_train, y_train)
|
|
251
|
+
>>> clf.predict_proba(X_test[:1])
|
|
252
|
+
array([[0.46123716, 0.53876284]])
|
|
253
|
+
>>> clf.predict(X_test[:5, :])
|
|
254
|
+
array([1, 0, 1, 0, 1])
|
|
255
|
+
"""
|
|
256
|
+
def __init__(
|
|
257
|
+
self,
|
|
258
|
+
hidden_layer_sizes: np.typing.ArrayLike = (100,),
|
|
259
|
+
activation: str = "identity",
|
|
260
|
+
weight_scheme: str = "uniform",
|
|
261
|
+
direct_links: bool = True,
|
|
262
|
+
seed: int = None,
|
|
263
|
+
reg_alpha: float = None,
|
|
264
|
+
rtol: float = None
|
|
265
|
+
):
|
|
266
|
+
super().__init__(hidden_layer_sizes=hidden_layer_sizes,
|
|
267
|
+
activation=activation,
|
|
268
|
+
weight_scheme=weight_scheme,
|
|
269
|
+
direct_links=direct_links,
|
|
270
|
+
seed=seed,
|
|
271
|
+
reg_alpha=reg_alpha,
|
|
272
|
+
rtol=rtol)
|
|
273
|
+
|
|
274
|
+
def fit(self, X, y):
|
|
275
|
+
"""
|
|
276
|
+
Build a gradient-free neural network from the training set (X, y).
|
|
277
|
+
|
|
278
|
+
Parameters
|
|
279
|
+
----------
|
|
280
|
+
|
|
281
|
+
X : array-like of shape (n_samples, n_features)
|
|
282
|
+
The training input samples.
|
|
283
|
+
y : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
284
|
+
The target values (class labels).
|
|
285
|
+
|
|
286
|
+
Returns
|
|
287
|
+
-------
|
|
288
|
+
object
|
|
289
|
+
Fitted estimator.
|
|
290
|
+
"""
|
|
291
|
+
# shape: (n_samples, n_features)
|
|
292
|
+
X, Y = validate_data(self, X, y)
|
|
293
|
+
self.classes_ = unique_labels(Y)
|
|
294
|
+
|
|
295
|
+
# onehot y
|
|
296
|
+
# (this is necessary for everything beyond binary classification)
|
|
297
|
+
self.enc_ = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
|
|
298
|
+
# shape: (n_samples, n_classes-1)
|
|
299
|
+
Y = self.enc_.fit_transform(Y.reshape(-1, 1))
|
|
300
|
+
|
|
301
|
+
# call base fit method
|
|
302
|
+
super().fit(X, Y)
|
|
303
|
+
return self
|
|
304
|
+
|
|
305
|
+
def predict(self, X):
|
|
306
|
+
"""
|
|
307
|
+
Predict class for X.
|
|
308
|
+
|
|
309
|
+
Parameters
|
|
310
|
+
----------
|
|
311
|
+
X : array-like of shape (n_samples, n_features)
|
|
312
|
+
The input samples.
|
|
313
|
+
|
|
314
|
+
Returns
|
|
315
|
+
-------
|
|
316
|
+
ndarray
|
|
317
|
+
The predicted classes, with shape (n_samples,) or (n_samples, n_outputs).
|
|
318
|
+
"""
|
|
319
|
+
check_is_fitted(self)
|
|
320
|
+
X = validate_data(self, X, reset=False)
|
|
321
|
+
out = self.predict_proba(X)
|
|
322
|
+
y_hat = self.classes_[np.argmax(out, axis=1)]
|
|
323
|
+
return y_hat
|
|
324
|
+
|
|
325
|
+
def predict_proba(self, X):
|
|
326
|
+
"""
|
|
327
|
+
Predict class probabilities for X.
|
|
328
|
+
|
|
329
|
+
Parameters
|
|
330
|
+
----------
|
|
331
|
+
X : array-like of shape (n_samples, n_features)
|
|
332
|
+
The input samples.
|
|
333
|
+
|
|
334
|
+
Returns
|
|
335
|
+
-------
|
|
336
|
+
ndarray
|
|
337
|
+
The class probabilities of the input samples. The order of the classes
|
|
338
|
+
corresponds to that in the attribute ``classes_``. The ndarray should
|
|
339
|
+
have shape (n_samples, n_classes).
|
|
340
|
+
"""
|
|
341
|
+
check_is_fitted(self)
|
|
342
|
+
X = validate_data(self, X, reset=False)
|
|
343
|
+
out = super().predict(X)
|
|
344
|
+
out = np.exp(out - logsumexp(out, axis=1, keepdims=True))
|
|
345
|
+
return out
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
class EnsembleGFDL(GFDL):
|
|
349
|
+
"""Base class for ensemble GFDL model for classification and regression."""
|
|
350
|
+
def __init__(
|
|
351
|
+
self,
|
|
352
|
+
hidden_layer_sizes: np.typing.ArrayLike = (100,),
|
|
353
|
+
activation: str = "identity",
|
|
354
|
+
weight_scheme: str = "uniform",
|
|
355
|
+
seed: int = None,
|
|
356
|
+
reg_alpha: float = None,
|
|
357
|
+
rtol: float | None = None,
|
|
358
|
+
):
|
|
359
|
+
super().__init__(hidden_layer_sizes=hidden_layer_sizes,
|
|
360
|
+
activation=activation,
|
|
361
|
+
weight_scheme=weight_scheme,
|
|
362
|
+
direct_links=True,
|
|
363
|
+
seed=seed,
|
|
364
|
+
reg_alpha=reg_alpha,
|
|
365
|
+
rtol=rtol)
|
|
366
|
+
|
|
367
|
+
def fit(self, X, Y):
|
|
368
|
+
|
|
369
|
+
if self.reg_alpha is not None and self.reg_alpha < 0.0:
|
|
370
|
+
raise ValueError("Negative reg_alpha. Expected range : None or [0.0, inf).")
|
|
371
|
+
|
|
372
|
+
fn = resolve_activation(self.activation)[1]
|
|
373
|
+
self._activation_fn = fn
|
|
374
|
+
self._N = X.shape[1]
|
|
375
|
+
hidden_layer_sizes = np.asarray(self.hidden_layer_sizes)
|
|
376
|
+
self._weight_mode = resolve_weight(self.weight_scheme)
|
|
377
|
+
|
|
378
|
+
self.W_ = []
|
|
379
|
+
self.b_ = []
|
|
380
|
+
rng = self.get_generator(self.seed)
|
|
381
|
+
|
|
382
|
+
self.W_.append(
|
|
383
|
+
self._weight_mode(
|
|
384
|
+
self._N, hidden_layer_sizes[0], rng=self.get_generator(self.seed)
|
|
385
|
+
)
|
|
386
|
+
)
|
|
387
|
+
self.b_.append(
|
|
388
|
+
self._weight_mode(1, hidden_layer_sizes[0], rng=rng)
|
|
389
|
+
.reshape(-1)
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
for i, layer in enumerate(hidden_layer_sizes[1:]):
|
|
393
|
+
# (n_hidden, n_features)
|
|
394
|
+
self.W_.append(
|
|
395
|
+
self._weight_mode(hidden_layer_sizes[i] + self._N, layer, rng=rng)
|
|
396
|
+
)
|
|
397
|
+
# (n_hidden,)
|
|
398
|
+
self.b_.append(
|
|
399
|
+
self._weight_mode(1, layer, rng=rng,).reshape(-1)
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
self.coeffs_ = []
|
|
403
|
+
D = X
|
|
404
|
+
|
|
405
|
+
for W, b in zip(self.W_, self.b_, strict=False):
|
|
406
|
+
Z = D @ W.T + b # (n_samples, n_hidden_layer_i)
|
|
407
|
+
H = self._activation_fn(Z)
|
|
408
|
+
# design matrix shape: (n_samples, n_hidden_layer_i+n_features)
|
|
409
|
+
# or (n_samples, n_hidden_final)
|
|
410
|
+
D = np.hstack((H, X))
|
|
411
|
+
|
|
412
|
+
# beta shape: (n_hidden_final+n_features, n_classes-1)
|
|
413
|
+
# or (n_hidden_final, n_classes-1)
|
|
414
|
+
|
|
415
|
+
# If reg_alpha is None, use direct solve using
|
|
416
|
+
# MoorePenrose Pseudo-Inverse, otherwise use ridge regularized form.
|
|
417
|
+
if self.reg_alpha is None:
|
|
418
|
+
coeff = np.linalg.pinv(D, rtol=self.rtol) @ Y
|
|
419
|
+
else:
|
|
420
|
+
ridge = Ridge(alpha=self.reg_alpha, fit_intercept=False)
|
|
421
|
+
ridge.fit(D, Y)
|
|
422
|
+
coeff = ridge.coef_.T
|
|
423
|
+
self.coeffs_.append(coeff)
|
|
424
|
+
|
|
425
|
+
return self
|
|
426
|
+
|
|
427
|
+
def _forward(self, X):
|
|
428
|
+
check_is_fitted(self)
|
|
429
|
+
outs = []
|
|
430
|
+
|
|
431
|
+
D = X
|
|
432
|
+
for W, b, coeff in zip(self.W_, self.b_, self.coeffs_, strict=True):
|
|
433
|
+
Z = D @ W.T + b
|
|
434
|
+
H = self._activation_fn(Z)
|
|
435
|
+
|
|
436
|
+
D = np.hstack((H, X))
|
|
437
|
+
|
|
438
|
+
out = D @ coeff
|
|
439
|
+
outs.append(out)
|
|
440
|
+
|
|
441
|
+
return outs
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
class EnsembleGFDLClassifier(ClassifierMixin, EnsembleGFDL):
|
|
445
|
+
"""
|
|
446
|
+
Ensemble random vector functional link network classifier.
|
|
447
|
+
|
|
448
|
+
Parameters
|
|
449
|
+
----------
|
|
450
|
+
|
|
451
|
+
hidden_layer_sizes : array-like of shape (n_layers,)
|
|
452
|
+
The ith element represents the number of neurons in the ith
|
|
453
|
+
hidden layer.
|
|
454
|
+
|
|
455
|
+
activation : str, default='identity'
|
|
456
|
+
Activation function for the hidden layers.
|
|
457
|
+
|
|
458
|
+
- 'identity', no-op activation, useful to implement linear bottleneck,
|
|
459
|
+
returns f(x) = x
|
|
460
|
+
|
|
461
|
+
- 'tanh': :func:`tanh <gfdl.activations.tanh>`.
|
|
462
|
+
|
|
463
|
+
- 'relu': :func:`relu <gfdl.activations.relu>`.
|
|
464
|
+
|
|
465
|
+
- 'sigmoid': :func:`sigmoid <gfdl.activations.sigmoid>`.
|
|
466
|
+
|
|
467
|
+
- 'softmax': :func:`softmax <gfdl.activations.softmax>`.
|
|
468
|
+
|
|
469
|
+
- 'softmin': :func:`softmin <gfdl.activations.softmin>`.
|
|
470
|
+
|
|
471
|
+
- 'log_sigmoid': :func:`log_sigmoid <gfdl.activations.log_sigmoid>`.
|
|
472
|
+
|
|
473
|
+
- 'log_softmax': :func:`log_softmax <gfdl.activations.log_softmax>`.
|
|
474
|
+
|
|
475
|
+
weight_scheme : str, default='uniform'
|
|
476
|
+
Distribution used to initialize the random hidden-layer weights.
|
|
477
|
+
|
|
478
|
+
The initialization functions generate weight matrices of shape
|
|
479
|
+
(n_hidden_units, n_features), where values are drawn
|
|
480
|
+
according to the selected scheme.
|
|
481
|
+
|
|
482
|
+
- 'zeros': set weights to zeros (:func:`zeros <gfdl.weights.zeros>`).
|
|
483
|
+
|
|
484
|
+
- 'range': set weights to normalized np.arange
|
|
485
|
+
(:func:`range <gfdl.weights.range>`).
|
|
486
|
+
|
|
487
|
+
- 'uniform': uniform distribution (:func:`uniform <gfdl.weights.uniform>`).
|
|
488
|
+
|
|
489
|
+
- 'he_uniform': He uniform distribution
|
|
490
|
+
(:func:`he_uniform <gfdl.weights.he_uniform>`).
|
|
491
|
+
|
|
492
|
+
- 'lecun_uniform': Lecun uniform distribution
|
|
493
|
+
(:func:`lecun_uniform <gfdl.weights.lecun_uniform>`).
|
|
494
|
+
|
|
495
|
+
- 'glorot_uniform': Glorot uniform distribution
|
|
496
|
+
(:func:`glorot_uniform <gfdl.weights.glorot_uniform>`).
|
|
497
|
+
|
|
498
|
+
- 'normal': Normal distribution (:func:`normal <gfdl.weights.normal>`).
|
|
499
|
+
|
|
500
|
+
- 'he_normal': He normal distribution
|
|
501
|
+
(:func:`he_normal <gfdl.weights.he_normal>`).
|
|
502
|
+
|
|
503
|
+
- 'lecun_normal': Lecun normal distribution
|
|
504
|
+
(:func:`lecun_normal <gfdl.weights.lecun_normal>`).
|
|
505
|
+
|
|
506
|
+
- 'glorot_normal': Glorot normal distribution
|
|
507
|
+
(:func:`glorot_normal <gfdl.weights.glorot_normal>`).
|
|
508
|
+
|
|
509
|
+
seed : int, default=`None`
|
|
510
|
+
Random seed used to initialize the network.
|
|
511
|
+
|
|
512
|
+
reg_alpha : float, default=`None`
|
|
513
|
+
When `None`, use Moore-Penrose inversion to solve for the output
|
|
514
|
+
weights of the network. Otherwise, it specifies the constant that
|
|
515
|
+
multiplies the L2 term of `sklearn` `Ridge`, controlling the
|
|
516
|
+
regularization strength. `reg_alpha` must be a non-negative float.
|
|
517
|
+
|
|
518
|
+
rtol : float, default=None
|
|
519
|
+
Cutoff for small singular values for the Moore-Penrose
|
|
520
|
+
pseudo-inverse. Only applies when ``reg_alpha=None``.
|
|
521
|
+
When ``rtol=None``, the array API standard default for
|
|
522
|
+
``pinv`` is used.
|
|
523
|
+
|
|
524
|
+
voting : str, default=`"soft"`
|
|
525
|
+
Whether to use soft or hard voting in the ensemble.
|
|
526
|
+
|
|
527
|
+
Notes
|
|
528
|
+
-----
|
|
529
|
+
The implementation is based on the one described by Shi et al. in [1]_.
|
|
530
|
+
|
|
531
|
+
References
|
|
532
|
+
----------
|
|
533
|
+
.. [1] Shi, Katuwal, Suganthan, Tanveer, "Random vector functional
|
|
534
|
+
link neural network based ensemble deep learning." Pattern Recognition,
|
|
535
|
+
vol. 117, pp. 107978, 2021, https://doi.org/10.1016/j.patcog.2021.107978.
|
|
536
|
+
|
|
537
|
+
Examples
|
|
538
|
+
--------
|
|
539
|
+
>>> from sklearn.datasets import make_classification
|
|
540
|
+
>>> from gfdl.model import EnsembleGFDLClassifier
|
|
541
|
+
>>> X, y = make_classification(n_samples=1000, n_features=4,
|
|
542
|
+
... n_informative=2, n_redundant=0,
|
|
543
|
+
... random_state=0, shuffle=False)
|
|
544
|
+
>>> clf = EnsembleGFDLClassifier(seed=0)
|
|
545
|
+
>>> clf.fit(X, y)
|
|
546
|
+
>>> print(clf.predict([[0, 0, 0, 0]]))
|
|
547
|
+
[1]
|
|
548
|
+
"""
|
|
549
|
+
def __init__(
|
|
550
|
+
self,
|
|
551
|
+
hidden_layer_sizes: np.typing.ArrayLike = (100,),
|
|
552
|
+
activation: str = "identity",
|
|
553
|
+
weight_scheme: str = "uniform",
|
|
554
|
+
seed: int = None,
|
|
555
|
+
reg_alpha: float = None,
|
|
556
|
+
rtol: float = None,
|
|
557
|
+
voting: str = "soft", # "soft" or "hard"
|
|
558
|
+
):
|
|
559
|
+
super().__init__(hidden_layer_sizes=hidden_layer_sizes,
|
|
560
|
+
activation=activation,
|
|
561
|
+
weight_scheme=weight_scheme,
|
|
562
|
+
seed=seed,
|
|
563
|
+
reg_alpha=reg_alpha,
|
|
564
|
+
rtol=rtol
|
|
565
|
+
)
|
|
566
|
+
self.voting = voting
|
|
567
|
+
|
|
568
|
+
def fit(self, X, y):
|
|
569
|
+
"""
|
|
570
|
+
Train the ensemble of connected RVFL networks on the training set (X, y).
|
|
571
|
+
|
|
572
|
+
Parameters
|
|
573
|
+
----------
|
|
574
|
+
|
|
575
|
+
X : array-like of shape (n_samples, n_features)
|
|
576
|
+
The training input samples.
|
|
577
|
+
y : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
578
|
+
The target values.
|
|
579
|
+
|
|
580
|
+
Returns
|
|
581
|
+
-------
|
|
582
|
+
object
|
|
583
|
+
The fitted estimator.
|
|
584
|
+
"""
|
|
585
|
+
# shape: (n_samples, n_features)
|
|
586
|
+
X, Y = validate_data(self, X, y)
|
|
587
|
+
self.classes_ = unique_labels(Y)
|
|
588
|
+
|
|
589
|
+
# onehot y
|
|
590
|
+
# (this is necessary for everything beyond binary classification)
|
|
591
|
+
self.enc_ = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
|
|
592
|
+
# shape: (n_samples, n_classes-1)
|
|
593
|
+
Y = self.enc_.fit_transform(Y.reshape(-1, 1))
|
|
594
|
+
|
|
595
|
+
# call base fit method
|
|
596
|
+
super().fit(X, Y)
|
|
597
|
+
return self
|
|
598
|
+
|
|
599
|
+
def _check_voting(self):
|
|
600
|
+
if self.voting == "hard":
|
|
601
|
+
raise AttributeError(
|
|
602
|
+
f"predict_proba is not available when voting={self.voting!r}"
|
|
603
|
+
)
|
|
604
|
+
return True
|
|
605
|
+
|
|
606
|
+
@available_if(_check_voting)
|
|
607
|
+
def predict_proba(self, X):
|
|
608
|
+
"""
|
|
609
|
+
Predict class probabilities for X.
|
|
610
|
+
|
|
611
|
+
Parameters
|
|
612
|
+
----------
|
|
613
|
+
X : array-like of shape (n_samples, n_features)
|
|
614
|
+
The input samples.
|
|
615
|
+
|
|
616
|
+
Returns
|
|
617
|
+
-------
|
|
618
|
+
ndarray
|
|
619
|
+
The class probabilities of the input samples. The order of the classes
|
|
620
|
+
corresponds to that in the attribute ``classes_``. The ndarray should
|
|
621
|
+
have shape (n_samples, n_classes).
|
|
622
|
+
"""
|
|
623
|
+
check_is_fitted(self)
|
|
624
|
+
X = validate_data(self, X, reset=False)
|
|
625
|
+
|
|
626
|
+
outs = self._forward(X)
|
|
627
|
+
probs = []
|
|
628
|
+
|
|
629
|
+
for out in outs:
|
|
630
|
+
p = np.exp(out - logsumexp(out, axis=1, keepdims=True))
|
|
631
|
+
probs.append(p)
|
|
632
|
+
|
|
633
|
+
return np.mean(probs, axis=0)
|
|
634
|
+
|
|
635
|
+
def predict(self, X):
|
|
636
|
+
"""
|
|
637
|
+
Predict class for X.
|
|
638
|
+
|
|
639
|
+
Parameters
|
|
640
|
+
----------
|
|
641
|
+
X : array-like of shape (n_samples, n_features)
|
|
642
|
+
The input samples.
|
|
643
|
+
|
|
644
|
+
Returns
|
|
645
|
+
-------
|
|
646
|
+
ndarray
|
|
647
|
+
The predicted classes, with shape (n_samples,) or (n_samples, n_outputs).
|
|
648
|
+
"""
|
|
649
|
+
check_is_fitted(self)
|
|
650
|
+
X = validate_data(self, X, reset=False)
|
|
651
|
+
|
|
652
|
+
if self.voting == "soft":
|
|
653
|
+
P = self.predict_proba(X)
|
|
654
|
+
return self.classes_[np.argmax(P, axis=1)]
|
|
655
|
+
|
|
656
|
+
outs = self._forward(X)
|
|
657
|
+
votes = []
|
|
658
|
+
|
|
659
|
+
for out in outs:
|
|
660
|
+
p = np.exp(out - logsumexp(out, axis=1, keepdims=True))
|
|
661
|
+
votes.append(self.classes_[np.argmax(p, axis=1)])
|
|
662
|
+
|
|
663
|
+
votes = np.stack(votes, axis=1)
|
|
664
|
+
m = mode(votes, axis=1, keepdims=False)
|
|
665
|
+
return m.mode
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
class GFDLRegressor(RegressorMixin, MultiOutputMixin, GFDL):
|
|
669
|
+
"""
|
|
670
|
+
Random vector functional link network regressor.
|
|
671
|
+
|
|
672
|
+
This model fits a feedforward neural network with fixed random hidden-layer
|
|
673
|
+
parameters and solves for the output weights using linear least squares or
|
|
674
|
+
ridge regression. When direct links are disabled, the model architecture corresponds
|
|
675
|
+
to an Extreme Learning Machine (ELM) architecture.
|
|
676
|
+
|
|
677
|
+
Parameters
|
|
678
|
+
----------
|
|
679
|
+
hidden_layer_sizes : array-like of shape (n_layers,), default=(100,)
|
|
680
|
+
The ith element represents the number of neurons in the ith
|
|
681
|
+
hidden layer.
|
|
682
|
+
|
|
683
|
+
activation : str, default='identity'
|
|
684
|
+
Activation function for the hidden layers.
|
|
685
|
+
|
|
686
|
+
- 'identity', no-op activation, useful to implement linear bottleneck,
|
|
687
|
+
returns f(x) = x
|
|
688
|
+
|
|
689
|
+
- 'tanh': :func:`tanh <gfdl.activations.tanh>`.
|
|
690
|
+
|
|
691
|
+
- 'relu': :func:`relu <gfdl.activations.relu>`.
|
|
692
|
+
|
|
693
|
+
- 'sigmoid': :func:`sigmoid <gfdl.activations.sigmoid>`.
|
|
694
|
+
|
|
695
|
+
- 'softmax': :func:`softmax <gfdl.activations.softmax>`.
|
|
696
|
+
|
|
697
|
+
- 'softmin': :func:`softmin <gfdl.activations.softmin>`.
|
|
698
|
+
|
|
699
|
+
- 'log_sigmoid': :func:`log_sigmoid <gfdl.activations.log_sigmoid>`.
|
|
700
|
+
|
|
701
|
+
- 'log_softmax': :func:`log_softmax <gfdl.activations.log_softmax>`.
|
|
702
|
+
|
|
703
|
+
weight_scheme : str, default='uniform'
|
|
704
|
+
Distribution used to initialize the random hidden-layer weights.
|
|
705
|
+
|
|
706
|
+
The initialization functions generate weight matrices of shape
|
|
707
|
+
(n_hidden_units, n_features), where values are drawn
|
|
708
|
+
according to the selected scheme.
|
|
709
|
+
|
|
710
|
+
- 'zeros': set weights to zeros (:func:`zeros <gfdl.weights.zeros>`).
|
|
711
|
+
|
|
712
|
+
- 'range': set weights to normalized np.arange
|
|
713
|
+
(:func:`range <gfdl.weights.range>`).
|
|
714
|
+
|
|
715
|
+
- 'uniform': uniform distribution (:func:`uniform <gfdl.weights.uniform>`).
|
|
716
|
+
|
|
717
|
+
- 'he_uniform': He uniform distribution
|
|
718
|
+
(:func:`he_uniform <gfdl.weights.he_uniform>`).
|
|
719
|
+
|
|
720
|
+
- 'lecun_uniform': Lecun uniform distribution
|
|
721
|
+
(:func:`lecun_uniform <gfdl.weights.lecun_uniform>`).
|
|
722
|
+
|
|
723
|
+
- 'glorot_uniform': Glorot uniform distribution
|
|
724
|
+
(:func:`glorot_uniform <gfdl.weights.glorot_uniform>`).
|
|
725
|
+
|
|
726
|
+
- 'normal': Normal distribution (:func:`normal <gfdl.weights.normal>`).
|
|
727
|
+
|
|
728
|
+
- 'he_normal': He normal distribution
|
|
729
|
+
(:func:`he_normal <gfdl.weights.he_normal>`).
|
|
730
|
+
|
|
731
|
+
- 'lecun_normal': Lecun normal distribution
|
|
732
|
+
(:func:`lecun_normal <gfdl.weights.lecun_normal>`).
|
|
733
|
+
|
|
734
|
+
- 'glorot_normal': Glorot normal distribution
|
|
735
|
+
(:func:`glorot_normal <gfdl.weights.glorot_normal>`).
|
|
736
|
+
|
|
737
|
+
direct_links : bool, default=True
|
|
738
|
+
Whether to connect input layer to output nodes.
|
|
739
|
+
|
|
740
|
+
When set to False, only the hidden-layer activations are used, corresponding
|
|
741
|
+
to the Extreme Learning Machine (ELM) architecture.
|
|
742
|
+
|
|
743
|
+
seed : int, RandomState instance, default=None
|
|
744
|
+
Determines random number generation for weights and bias
|
|
745
|
+
initialization.
|
|
746
|
+
Pass an int for reproducible results across multiple function calls.
|
|
747
|
+
See :term:`Glossary <random_state>`.
|
|
748
|
+
|
|
749
|
+
reg_alpha : float, default=None
|
|
750
|
+
Amount of ridge shrinkage to apply in order to improve
|
|
751
|
+
conditioning during Ridge regression. When set to zero or `None`,
|
|
752
|
+
model uses direct solve using Moore-Penrose Pseudo-Inverse.
|
|
753
|
+
|
|
754
|
+
rtol : float, default=None
|
|
755
|
+
Cutoff for small singular values for the Moore-Penrose
|
|
756
|
+
pseudo-inverse. Only applies when ``reg_alpha=None``.
|
|
757
|
+
When ``rtol=None``, the array API standard default for
|
|
758
|
+
``pinv`` is used.
|
|
759
|
+
|
|
760
|
+
Attributes
|
|
761
|
+
----------
|
|
762
|
+
n_features_in_ : int
|
|
763
|
+
Number of features seen during :term:`fit`.
|
|
764
|
+
|
|
765
|
+
W_ : list of ndarray of shape (n_layers,)
|
|
766
|
+
Weight matrices of the hidden layers. The ith element in the list represents the
|
|
767
|
+
weight matrix corresponding to layer i.
|
|
768
|
+
|
|
769
|
+
b_ : list of ndarray of shape (n_layers,)
|
|
770
|
+
Bias vectors of the hidden layers. The ith element in the list represents the
|
|
771
|
+
bias term corresponding to layer i.
|
|
772
|
+
|
|
773
|
+
coeff_ : ndarray of shape (n_features_out, n_outputs)
|
|
774
|
+
Output weight matrix learned by the fit method.
|
|
775
|
+
|
|
776
|
+
See Also
|
|
777
|
+
--------
|
|
778
|
+
GFDLClassifier : Classifier variant for the RVFL architecture.
|
|
779
|
+
|
|
780
|
+
Examples
|
|
781
|
+
--------
|
|
782
|
+
>>> from gfdl.model import GFDLRegressor
|
|
783
|
+
>>> from sklearn.datasets import make_regression
|
|
784
|
+
>>> from sklearn.model_selection import train_test_split
|
|
785
|
+
>>> X, y = make_regression(n_samples=200, n_features=20, random_state=1)
|
|
786
|
+
>>> X_train, X_test, y_train, y_test = train_test_split(X, y,
|
|
787
|
+
... random_state=1)
|
|
788
|
+
>>> regr = GFDLRegressor(seed=1)
|
|
789
|
+
>>> regr.fit(X_train, y_train)
|
|
790
|
+
GFDLRegressor(seed=1)
|
|
791
|
+
>>> regr.predict(X_test[:2])
|
|
792
|
+
array([ 18.368, -278.014])
|
|
793
|
+
"""
|
|
794
|
+
def __init__(
|
|
795
|
+
self,
|
|
796
|
+
hidden_layer_sizes: np.typing.ArrayLike = (100,),
|
|
797
|
+
activation: str = "identity",
|
|
798
|
+
weight_scheme: str = "uniform",
|
|
799
|
+
direct_links: bool = True,
|
|
800
|
+
seed: int = None,
|
|
801
|
+
reg_alpha: float = None,
|
|
802
|
+
rtol: float | None = None,
|
|
803
|
+
):
|
|
804
|
+
super().__init__(hidden_layer_sizes=hidden_layer_sizes,
|
|
805
|
+
activation=activation,
|
|
806
|
+
weight_scheme=weight_scheme,
|
|
807
|
+
direct_links=direct_links,
|
|
808
|
+
seed=seed,
|
|
809
|
+
reg_alpha=reg_alpha,
|
|
810
|
+
rtol=rtol)
|
|
811
|
+
|
|
812
|
+
def fit(self, X, y):
|
|
813
|
+
"""
|
|
814
|
+
Train the gradient-free neural network on the training set (X, y).
|
|
815
|
+
|
|
816
|
+
Parameters
|
|
817
|
+
----------
|
|
818
|
+
|
|
819
|
+
X : array-like of shape (n_samples, n_features)
|
|
820
|
+
The training input samples.
|
|
821
|
+
y : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
822
|
+
The target values.
|
|
823
|
+
|
|
824
|
+
Returns
|
|
825
|
+
-------
|
|
826
|
+
object
|
|
827
|
+
The fitted estimator.
|
|
828
|
+
"""
|
|
829
|
+
X, Y = validate_data(self, X, y, multi_output=True)
|
|
830
|
+
super().fit(X, Y)
|
|
831
|
+
return self
|
|
832
|
+
|
|
833
|
+
def predict(self, X):
|
|
834
|
+
"""
|
|
835
|
+
Predict regression target for X.
|
|
836
|
+
|
|
837
|
+
Parameters
|
|
838
|
+
----------
|
|
839
|
+
|
|
840
|
+
X : array-like of shape (n_samples, n_features)
|
|
841
|
+
The input samples.
|
|
842
|
+
|
|
843
|
+
Returns
|
|
844
|
+
-------
|
|
845
|
+
ndarray
|
|
846
|
+
The predicted values. Should have shape (n_samples,) or
|
|
847
|
+
(n_samples, n_outputs).
|
|
848
|
+
"""
|
|
849
|
+
check_is_fitted(self)
|
|
850
|
+
X = validate_data(self, X, reset=False)
|
|
851
|
+
return super().predict(X)
|